add files for public distribution
based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
This commit is contained in:
0
pmsco/__init__.py
Normal file
0
pmsco/__init__.py
Normal file
17
pmsco/__main__.py
Normal file
17
pmsco/__main__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
@package pmsco.__main__
|
||||
__main__ module
|
||||
|
||||
thanks to this small module you can go to the project directory and run PMSCO like this:
|
||||
@verbatim
|
||||
python pmsco [pmsco-arguments]
|
||||
@endverbatim
|
||||
"""
|
||||
|
||||
import pmsco
|
||||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
args, unknown_args = pmsco.parse_cli()
|
||||
pmsco.main_pmsco(args, unknown_args)
|
||||
sys.exit(0)
|
||||
131
pmsco/calculator.py
Normal file
131
pmsco/calculator.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""
|
||||
@package pmsco.calculator
|
||||
abstract scattering program interface.
|
||||
|
||||
this module declares the basic interface to scattering programs.
|
||||
for each scattering program (EDAC, MSC, SSC, ...) a specific interface must be derived from CalcInterface.
|
||||
the derived interface must implement the run() method.
|
||||
the run() method and the scattering code may use only the parameters declared in the interface.
|
||||
|
||||
TestCalcInterface is provided for testing the PMSCO code quickly without calling an external program.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import time
|
||||
import numpy as np
|
||||
import data as md
|
||||
import cluster as mc
|
||||
|
||||
__author__ = 'matthias muntwiler'
|
||||
|
||||
|
||||
class Calculator(object):
|
||||
"""
|
||||
Interface class to the calculation program.
|
||||
"""
|
||||
def run(self, params, cluster, scan, output_file):
|
||||
"""
|
||||
run a calculation with the given parameters and cluster.
|
||||
|
||||
the result is returned as the method result and in a file named <code>output_file + '.etpi'</code>,
|
||||
or <code>output_file + '.etpai'</code> depending on scan mode.
|
||||
all other intermediate files are deleted unless keep_temp_files is True.
|
||||
|
||||
@param params: a msco_project.Params() object with all necessary values except cluster and output files set.
|
||||
|
||||
@param cluster: a msco_cluster.Cluster() object with all atom positions set.
|
||||
|
||||
@param scan: a msco_project.Scan() object describing the experimental scanning scheme.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: result_file, files_cats
|
||||
@arg result_file is the name of the main ETPI or ETPAI result file to be further processed.
|
||||
@arg files_cats is a dictionary that lists the names of all created data files with their category.
|
||||
the dictionary key is the file name,
|
||||
the value is the file category (cluster, phase, etc.).
|
||||
"""
|
||||
return None, None
|
||||
|
||||
def check_cluster(self, cluster, output_file):
|
||||
"""
|
||||
export the cluster in XYZ format for reference.
|
||||
|
||||
along with the complete cluster, the method also saves cuts in the xz (extension .y.xyz) and yz (.x.xyz) plane.
|
||||
|
||||
@param cluster: a pmsco.cluster.Cluster() object with all atom positions set.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: dictionary listing the names of the created files with their category.
|
||||
the dictionary key is the file name,
|
||||
the value is the file category (cluster).
|
||||
|
||||
@warning experimental: this method may be moved elsewhere in a future version.
|
||||
"""
|
||||
xyz_filename = output_file + ".xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
files = {xyz_filename: 'cluster'}
|
||||
|
||||
clucut = mc.Cluster()
|
||||
clucut.copy_from(cluster)
|
||||
clucut.trim_slab("x", 0.0, 0.1)
|
||||
xyz_filename = output_file + ".x.xyz"
|
||||
clucut.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
files[xyz_filename] = 'cluster'
|
||||
|
||||
clucut.copy_from(cluster)
|
||||
clucut.trim_slab("y", 0.0, 0.1)
|
||||
xyz_filename = output_file + ".y.xyz"
|
||||
clucut.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
files[xyz_filename] = 'cluster'
|
||||
|
||||
return files
|
||||
|
||||
|
||||
class TestCalculator(Calculator):
|
||||
"""
|
||||
interface class producing random data for testing the MSCO code without calling an external program.
|
||||
"""
|
||||
def run(self, params, cluster, scan, output_file):
|
||||
"""
|
||||
produce a random test data set.
|
||||
|
||||
the scan scheme is generated from the given parameters.
|
||||
the intensities are random values.
|
||||
|
||||
@return: result_file, files_cats
|
||||
the result file contains an ETPI or ETPAI array with random intensity data.
|
||||
"""
|
||||
|
||||
# set up scan
|
||||
params.fixed_cluster = 'a' in scan.mode
|
||||
|
||||
# generate file names
|
||||
base_filename = output_file
|
||||
clu_filename = base_filename + ".clu"
|
||||
if params.fixed_cluster:
|
||||
etpi_filename = base_filename + ".etpai"
|
||||
else:
|
||||
etpi_filename = base_filename + ".etpi"
|
||||
|
||||
cluster.save_to_file(clu_filename)
|
||||
|
||||
# generate data and save in ETPI or ETPAI format
|
||||
result_etpi = scan.raw_data.copy()
|
||||
result_etpi['i'] = np.random.random_sample(result_etpi.shape)
|
||||
|
||||
# slow down the test for debugging
|
||||
time.sleep(5)
|
||||
|
||||
md.save_data(etpi_filename, result_etpi)
|
||||
|
||||
files = {clu_filename: 'cluster', etpi_filename: 'energy'}
|
||||
return etpi_filename, files
|
||||
785
pmsco/cluster.py
Normal file
785
pmsco/cluster.py
Normal file
@@ -0,0 +1,785 @@
|
||||
"""
|
||||
@package pmsco.cluster
|
||||
cluster tools for MSC and EDAC
|
||||
|
||||
the Cluster class is provided to facilitate the construction and import/export of clusters.
|
||||
a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
|
||||
the class can import from/export to EDAC, MSC, and XYZ cluster files.
|
||||
XYZ allows for export to 3D visualizers, e.g. Avogadro.
|
||||
|
||||
@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
|
||||
@code{.sh}
|
||||
pip install --user periodictable
|
||||
@endcode
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import periodictable as pt
|
||||
|
||||
## default file format identifier
|
||||
FMT_DEFAULT = 0
|
||||
## MSC file format identifier
|
||||
FMT_MSC = 1
|
||||
## EDAC file format identifier
|
||||
FMT_EDAC = 2
|
||||
## XYZ file format identifier
|
||||
FMT_XYZ = 3
|
||||
|
||||
## numpy.array datatype of Cluster.data array
|
||||
DTYPE_CLUSTER_INTERNAL = [('i','i4'), ('t','i4'), ('s','a2'), ('x','f4'), ('y','f4'), ('z','f4'), ('e','u1')]
|
||||
## file format of internal Cluster.data array
|
||||
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
|
||||
## field (column) names of internal Cluster.data array
|
||||
FIELDS_CLUSTER_INTERNAL = ['i','t','s','x','y','z','e']
|
||||
|
||||
## numpy.array datatype of cluster for MSC cluster file input/output
|
||||
DTYPE_CLUSTER_MSC = [('i','i4'), ('x','f4'), ('y','f4'), ('z','f4'), ('t','i4')]
|
||||
## file format of MSC cluster file
|
||||
FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
|
||||
## field (column) names of MSC cluster file
|
||||
FIELDS_CLUSTER_MSC = ['i','x','y','z','t']
|
||||
|
||||
## numpy.array datatype of cluster for EDAC cluster file input/output
|
||||
DTYPE_CLUSTER_EDAC= [('i','i4'), ('t','i4'), ('x','f4'), ('y','f4'), ('z','f4')]
|
||||
## file format of EDAC cluster file
|
||||
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
|
||||
## field (column) names of EDAC cluster file
|
||||
FIELDS_CLUSTER_EDAC = ['i','t','x','y','z']
|
||||
|
||||
## numpy.array datatype of cluster for XYZ file input/output
|
||||
DTYPE_CLUSTER_XYZ= [('s','a2'), ('x','f4'), ('y','f4'), ('z','f4')]
|
||||
## file format of XYZ cluster file
|
||||
FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
|
||||
## field (column) names of XYZ cluster file
|
||||
FIELDS_CLUSTER_XYZ = ['s','x','y','z']
|
||||
|
||||
|
||||
class Cluster(object):
|
||||
"""
|
||||
Represents a cluster of atoms by their coordinates and chemical element.
|
||||
|
||||
the object stores the following information per atom in the @ref data array:
|
||||
|
||||
- sequential atom index (1-based)
|
||||
- atom type (chemical element number)
|
||||
- chemical element symbol
|
||||
- x coordinate of the atom position
|
||||
- t coordinate of the atom position
|
||||
- z coordinate of the atom position
|
||||
- emitter flag
|
||||
|
||||
the class also defines methods that add or manipulate atoms of the cluster.
|
||||
see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
|
||||
emitters can be flagged by the set_emitter method.
|
||||
|
||||
you may also manipulate the data array directly.
|
||||
in this case, be sure to keep the data array consistent.
|
||||
the update methods can help to recreate the index, atom type or symbol columns.
|
||||
|
||||
the class can also load and save files in some simple formats.
|
||||
"""
|
||||
|
||||
## @var rmax
|
||||
# maximum distance of atoms from the origin.
|
||||
#
|
||||
# float, default = 0
|
||||
#
|
||||
# this parameter restricts the addition of new atoms.
|
||||
# changing the parameter does not affect existing atoms.
|
||||
# the default is 0 (no atom will be added!).
|
||||
# you must set this parameter explicitly!
|
||||
|
||||
## @var dtype
|
||||
# data type of the internal numpy.ndarray.
|
||||
|
||||
## @var file_format
|
||||
# default file format.
|
||||
#
|
||||
# must be one of the FMT_MSC, FMT_EDAC, FMT_XYZ constants.
|
||||
# the initial value is FMT_XYZ.
|
||||
|
||||
## @var data
|
||||
# structured numpy array holding the atom positions.
|
||||
#
|
||||
# the columns of the array are:
|
||||
# @arg @c 'i' (int) atom index (1-based)
|
||||
# @arg @c 't' (int) atom type (chemical element number)
|
||||
# @arg @c 's' (string) chemical element symbol
|
||||
# @arg @c 'x' (float32) x coordinate of the atom position
|
||||
# @arg @c 'y' (float32) t coordinate of the atom position
|
||||
# @arg @c 'z' (float32) z coordinate of the atom position
|
||||
# @arg @c 'e' (uint8) 1 = emitter, 0 = regular atom
|
||||
|
||||
## @var comment (str)
|
||||
# one-line comment that can be included in some cluster files
|
||||
|
||||
def __init__(self):
|
||||
self.data = None
|
||||
self.rmax = 0.0
|
||||
self.dtype = DTYPE_CLUSTER_INTERNAL
|
||||
self.file_format = FMT_XYZ
|
||||
self.comment = ""
|
||||
self.clear()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Remove all atoms from the cluster.
|
||||
"""
|
||||
n_atoms = 0
|
||||
self.data = np.zeros(n_atoms, dtype=self.dtype)
|
||||
|
||||
def copy_from(self, cluster):
|
||||
"""
|
||||
Copy the data from another cluster.
|
||||
|
||||
@param cluster (Cluster): other Cluster object.
|
||||
"""
|
||||
self.data = cluster.data.copy()
|
||||
|
||||
def set_rmax(self, r):
|
||||
"""
|
||||
set rmax, the maximum distance of atoms from the origin.
|
||||
|
||||
atoms with norm greater than rmax will not be added to the cluster
|
||||
by the add_layer() and add_bulk() methods.
|
||||
existing atoms are not affected when changing rmax.
|
||||
|
||||
you must set this parameter explicitly, as the default value is 0
|
||||
(no atom will be added)!
|
||||
"""
|
||||
self.rmax = r
|
||||
|
||||
def build_element(self, index, element_number, x, y, z, emitter):
|
||||
"""
|
||||
build a tuple in the format of the internal data array.
|
||||
|
||||
@param index: (int) index
|
||||
|
||||
@param element_number: (int) chemical element number
|
||||
|
||||
@param x, y, z: (float) atom coordinates in the cluster
|
||||
|
||||
@param emitter: (uint) 1 = emitter, 0 = regular
|
||||
"""
|
||||
symbol = pt.elements[element_number].symbol
|
||||
element = (index, element_number, symbol, x, y, z, emitter)
|
||||
return element
|
||||
|
||||
def add_atom(self, atomtype, v_pos, is_emitter):
|
||||
"""
|
||||
add a single atom to the cluster.
|
||||
|
||||
@param atomtype: (int) chemical element number
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3)) position vector
|
||||
|
||||
@param is_emitter: (uint) 1 = emitter, 0 = regular
|
||||
"""
|
||||
n0 = self.data.shape[0] + 1
|
||||
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter)
|
||||
self.data = np.append(self.data, np.array(element,
|
||||
dtype=self.data.dtype))
|
||||
|
||||
def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
|
||||
"""
|
||||
add a layer of atoms to the cluster.
|
||||
|
||||
the layer is expanded up to the limit given by
|
||||
self.rmax (maximum distance from the origin).
|
||||
all atoms are non-emitters.
|
||||
|
||||
@param atomtype: (int) chemical element number
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3))
|
||||
position vector of the first atom (basis vector)
|
||||
|
||||
@param v_lat1, v_lat2: (numpy.ndarray, shape = (3))
|
||||
lattice vectors.
|
||||
"""
|
||||
r_great = max(self.rmax, np.linalg.norm(v_pos))
|
||||
n0 = self.data.shape[0] + 1
|
||||
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 3) * 2
|
||||
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 3) * 2
|
||||
nn = 0
|
||||
buf = np.empty((2 * n1 + 1) * (2 * n2 + 1), dtype=self.dtype)
|
||||
for i1 in range(-n1, n1 + 1):
|
||||
for i2 in range(-n2, n2 + 1):
|
||||
v = v_pos + v_lat1 * i1 + v_lat2 * i2
|
||||
if np.linalg.norm(v) <= self.rmax:
|
||||
buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0)
|
||||
nn += 1
|
||||
buf = np.resize(buf, nn)
|
||||
self.data = np.append(self.data, buf)
|
||||
|
||||
def add_bulk(self, atomtype, v_pos, v_lat1, v_lat2, v_lat3, z_surf=0.0):
|
||||
"""
|
||||
add bulk atoms to the cluster.
|
||||
|
||||
the lattice is expanded up to the limits given by
|
||||
self.rmax (maximum distance from the origin)
|
||||
and z_surf (position of the surface).
|
||||
all atoms are non-emitters.
|
||||
|
||||
@param atomtype: (int) chemical element number
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3))
|
||||
position vector of the first atom (basis vector)
|
||||
|
||||
@param v_lat1, v_lat2, v_lat3: (numpy.ndarray, shape = (3))
|
||||
lattice vectors.
|
||||
|
||||
@param z_surf: (float) position of surface.
|
||||
atoms with z > z_surf are not added.
|
||||
"""
|
||||
r_great = max(self.rmax, np.linalg.norm(v_pos))
|
||||
n0 = self.data.shape[0] + 1
|
||||
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3
|
||||
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3
|
||||
n3 = max(int(r_great / np.linalg.norm(v_lat3)) + 1, 4) * 3
|
||||
nn = 0
|
||||
buf = np.empty((2 * n1 + 1) * (2 * n2 + 1) * (n3 + 1), dtype=self.dtype)
|
||||
for i1 in range(-n1, n1 + 1):
|
||||
for i2 in range(-n2, n2 + 1):
|
||||
for i3 in range(-n3, n3 + 1):
|
||||
v = v_pos + v_lat1 * i1 + v_lat2 * i2 + v_lat3 * i3
|
||||
if np.linalg.norm(v) <= self.rmax and v[2] <= z_surf:
|
||||
buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0)
|
||||
nn += 1
|
||||
buf = np.resize(buf, nn)
|
||||
self.data = np.append(self.data, buf)
|
||||
|
||||
def add_cluster(self, cluster, check_rmax=False, check_unique=False, tol=0.001):
|
||||
"""
|
||||
add atoms from another cluster object.
|
||||
|
||||
@note the order of atoms in the internal data array may change during this operation.
|
||||
the atom index is updated.
|
||||
|
||||
@param cluster: Cluster object to be added.
|
||||
|
||||
@param check_rmax: if True, atoms outside self.rmax are not added.
|
||||
if False (default), all atoms of the other cluster are added.
|
||||
|
||||
@param check_unique: if True, atoms occupying the same position as an existing atom will not be added.
|
||||
if False (default), all atoms are added even if they occupy the same position.
|
||||
|
||||
@param tol: tolerance for checking uniqueness.
|
||||
positions of two atoms are considered equal if all coordinates lie within the tolerance interval.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
assert isinstance(cluster, Cluster)
|
||||
data = self.data.copy()
|
||||
source = cluster.data.copy()
|
||||
|
||||
if check_rmax and source.shape[0] > 0:
|
||||
source_xyz = source[['x', 'y', 'z']].copy()
|
||||
source_xyz = source_xyz.view((source_xyz.dtype[0], len(source_xyz.dtype.names)))
|
||||
b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
|
||||
idx = np.where(b_rmax)
|
||||
source = source[idx]
|
||||
data = np.append(data, source)
|
||||
|
||||
if check_unique and data.shape[0] > 0:
|
||||
data_xyz = data[['x', 'y', 'z']].copy()
|
||||
data_xyz = data_xyz.view((data_xyz.dtype[0], len(data_xyz.dtype.names)))
|
||||
tol_xyz = np.round(data_xyz / tol)
|
||||
uni_xyz = tol_xyz.view(tol_xyz.dtype.descr * 3)
|
||||
_, idx = np.unique(uni_xyz, return_index=True)
|
||||
data = data[np.sort(idx)]
|
||||
|
||||
self.data = data
|
||||
self.update_index()
|
||||
|
||||
def get_z_layers(self, tol=0.001):
|
||||
"""
|
||||
return the z-coordinates of atomic layers.
|
||||
the layers are stacked in the z-direction.
|
||||
|
||||
the function gathers unique z-coordinates.
|
||||
coordinates which are within the given tolerance are assigned to the same layer.
|
||||
|
||||
@param tol: tolerance
|
||||
@return: (numpy.ndarray) z-coordinates of the layers.
|
||||
the coordinates are numerically ordered, the top layer appears last.
|
||||
the returned coordinates may not be identical to any atom coordinate of a layer
|
||||
but deviate up to the given tolerance.
|
||||
"""
|
||||
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
|
||||
z2 = np.round(self_z.copy() / tol)
|
||||
layers = np.unique(z2) * tol
|
||||
return layers
|
||||
|
||||
def relax(self, z_cut, z_shift, element=0):
|
||||
"""
|
||||
shift atoms below a certain z coordinate by a fixed distance in the z direction.
|
||||
|
||||
@param z_cut: atoms below this z coordinate are shifted.
|
||||
@param z_shift: amount of shift in z direction
|
||||
(positive to move towards the surface, negative to move into the bulk).
|
||||
@param element: (int) chemical element number if atoms of a specific element should be affected.
|
||||
by default (element = 0), all atoms are moved.
|
||||
@return: (numpy.ndarray) indices of the atoms that have been shifted.
|
||||
"""
|
||||
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
|
||||
b_z = self_z <= z_cut
|
||||
b_all = b_z
|
||||
|
||||
if element:
|
||||
try:
|
||||
b_el = self.data['t'] == int(element)
|
||||
except ValueError:
|
||||
b_el = self.data['s'] == element
|
||||
b_all = np.all([b_z, b_el], axis=0)
|
||||
|
||||
idx = np.where(b_all)
|
||||
self.data['z'][idx] += z_shift
|
||||
|
||||
return idx
|
||||
|
||||
def matrix_transform(self, matrix):
|
||||
"""
|
||||
apply a transformation matrix to each atom of the cluster.
|
||||
|
||||
the transformed atom positions are calculated as v = R * transpose(v)
|
||||
|
||||
@param matrix: transformation matrix
|
||||
|
||||
@return: None
|
||||
"""
|
||||
for atom in self.data:
|
||||
v = np.matrix([atom['x'], atom['y'], atom['z']])
|
||||
w = matrix * v.transpose()
|
||||
atom['x'] = float(w[0])
|
||||
atom['y'] = float(w[1])
|
||||
atom['z'] = float(w[2])
|
||||
|
||||
def rotate_x(self, angle):
|
||||
"""
|
||||
rotate cluster about the surface normal axis
|
||||
|
||||
@param angle (float) in degrees
|
||||
"""
|
||||
angle = math.radians(angle)
|
||||
s = math.sin(angle)
|
||||
c = math.cos(angle)
|
||||
matrix = np.matrix([[1, 0, 0], [0, c, -s], [0, s, c]])
|
||||
self.matrix_transform(matrix)
|
||||
|
||||
def rotate_y(self, angle):
|
||||
"""
|
||||
rotate cluster about the surface normal axis
|
||||
|
||||
@param angle (float) in degrees
|
||||
"""
|
||||
angle = math.radians(angle)
|
||||
s = math.sin(angle)
|
||||
c = math.cos(angle)
|
||||
matrix = np.matrix([[c, 0, s], [0, 1, 0], [-s, 0, c]])
|
||||
self.matrix_transform(matrix)
|
||||
|
||||
def rotate_z(self, angle):
|
||||
"""
|
||||
rotate cluster about the surface normal axis
|
||||
|
||||
@param angle (float) in degrees
|
||||
"""
|
||||
angle = math.radians(angle)
|
||||
s = math.sin(angle)
|
||||
c = math.cos(angle)
|
||||
matrix = np.matrix([[c, -s, 0], [s, c, 0], [0, 0, 1]])
|
||||
self.matrix_transform(matrix)
|
||||
|
||||
def find_positions(self, pos, tol=0.001):
|
||||
"""
|
||||
find all atoms which occupy a given position.
|
||||
|
||||
@param pos: (numpy.array, shape = (3)) position vector.
|
||||
|
||||
@param tol: (float) matching tolerance per coordinate.
|
||||
|
||||
@return numpy.array of indices which match v_pos.
|
||||
"""
|
||||
b2 = np.abs(pos - self.get_positions()) < tol
|
||||
b1 = np.all(b2, axis=1)
|
||||
idx = np.where(b1)
|
||||
return idx[0]
|
||||
|
||||
def find_index_cylinder(self, pos, r_xy, r_z, element):
|
||||
"""
|
||||
find atoms of a given element within a cylindrical volume and return their indices.
|
||||
|
||||
@param pos: (numpy.array, shape = (3)) center position of the cylinder.
|
||||
|
||||
@param r_xy: (float) radius of the cylinder.
|
||||
returned atoms must match |atom(x,y) - pos(x,y)| <= r_xy.
|
||||
|
||||
@param r_z: (float) half height of the cylinder.
|
||||
returned atoms must match |atom(z) - pos(z)| <= r_z.
|
||||
|
||||
@param element: (str or int) element symbol or atomic number.
|
||||
if None, the element is not checked.
|
||||
|
||||
@return numpy.array of indices which match v_pos.
|
||||
"""
|
||||
pos_xy = pos[0:2]
|
||||
self_xy = self.data[['x', 'y']].copy()
|
||||
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
|
||||
b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy
|
||||
|
||||
pos_z = pos[2]
|
||||
self_z = self.data['z']
|
||||
b_z = np.abs(self_z - pos_z) <= r_z
|
||||
|
||||
if element is not None:
|
||||
try:
|
||||
b_el = self.data['t'] == int(element)
|
||||
except ValueError:
|
||||
b_el = self.data['s'] == element
|
||||
b_all = np.all([b_xy, b_z, b_el], axis=0)
|
||||
else:
|
||||
b_all = np.all([b_xy, b_z], axis=0)
|
||||
|
||||
idx = np.where(b_all)
|
||||
return idx[0]
|
||||
|
||||
def trim_cylinder(self, r_xy, r_z):
|
||||
"""
|
||||
remove atoms outside a given cylinder.
|
||||
|
||||
the cylinder is centered at the origin.
|
||||
|
||||
@param r_xy: (float) radius of the cylinder.
|
||||
atoms to keep must match |atom(x,y)| <= r_xy.
|
||||
|
||||
@param r_z: (float) half height of the cylinder.
|
||||
atoms to keep must match |atom(z)| <= r_z.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self_xy = self.data[['x', 'y']].copy()
|
||||
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
|
||||
b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy
|
||||
|
||||
self_z = self.data['z']
|
||||
b_z = np.abs(self_z) <= r_z
|
||||
|
||||
b_all = np.all([b_xy, b_z], axis=0)
|
||||
idx = np.where(b_all)
|
||||
self.data = self.data[idx]
|
||||
self.update_index()
|
||||
|
||||
def trim_sphere(self, radius):
|
||||
"""
|
||||
remove atoms outside a given sphere.
|
||||
|
||||
the sphere is centered at the origin.
|
||||
|
||||
@param radius: (float) radius of the sphere.
|
||||
atoms to keep must match |atom(x,y,z)| <= radius.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self_xyz = self.data[['x', 'y', 'z']].copy()
|
||||
self_xyz = self_xyz.view((self_xyz.dtype[0], len(self_xyz.dtype.names)))
|
||||
b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
|
||||
idx = np.where(b_xyz)
|
||||
self.data = self.data[idx]
|
||||
self.update_index()
|
||||
|
||||
def trim_slab(self, axis, center, depth):
|
||||
"""
|
||||
remove atoms outside a slab that is parallel to one of the coordinate planes.
|
||||
|
||||
@param axis: axis to trim: 'x', 'y' or 'z'.
|
||||
@param center: center position of the slab.
|
||||
@param depth: thickness of the slab.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
coord = self.data[axis].view(np.float32).reshape(self.data.shape)
|
||||
sel = np.abs(coord - center) <= depth / 2
|
||||
idx = np.where(sel)
|
||||
self.data = self.data[idx]
|
||||
self.update_index()
|
||||
|
||||
def set_emitter(self, pos=None, idx=-1, tol=0.001):
|
||||
"""
|
||||
select an atom as emitter.
|
||||
|
||||
the emitter atom can be specified by position or index.
|
||||
either one of the pos or idx arguments must be specified.
|
||||
|
||||
@param idx: (int) array index of the atom.
|
||||
|
||||
@param pos: (numpy.array, shape = (3)) position vector.
|
||||
|
||||
@param tol: (float) matching tolerance per component if pos argument is used.
|
||||
|
||||
@raise IndexError if the position cannot be found
|
||||
"""
|
||||
if pos is not None:
|
||||
ares = self.find_positions(pos, tol)
|
||||
idx = ares[0]
|
||||
item = self.data[idx]
|
||||
item['e'] = 1
|
||||
|
||||
def move_to_first(self, pos=None, idx=0, tol=0.001):
|
||||
"""
|
||||
move an atom to the first position.
|
||||
|
||||
the emitter atom can be specified by position or index.
|
||||
either one of the pos or idx arguments must be specified.
|
||||
|
||||
@param idx: (int) array index of the atom.
|
||||
must be greater than 1 to have an effect.
|
||||
|
||||
@param pos: (numpy.array, shape = (3)) position vector.
|
||||
|
||||
@param tol: (float) matching tolerance per component if pos argument is used.
|
||||
|
||||
@raise IndexError if the position cannot be found
|
||||
"""
|
||||
|
||||
if pos is not None:
|
||||
ares = self.find_positions(pos, tol)
|
||||
idx = ares[0]
|
||||
if idx:
|
||||
em = self.data[idx]
|
||||
self.data = np.delete(self.data, idx)
|
||||
self.data = np.insert(self.data, 0, em)
|
||||
self.update_index()
|
||||
|
||||
def get_positions(self):
|
||||
"""
|
||||
get an array of the atom coordinates.
|
||||
|
||||
the returned array is an independent copy of the original data.
|
||||
changes will not affect the original cluster.
|
||||
|
||||
@return numpy.ndarray, shape = (N,3)
|
||||
"""
|
||||
pos = self.data[['x', 'y', 'z']].copy()
|
||||
pos = pos.view((pos.dtype[0], len(pos.dtype.names)))
|
||||
return pos
|
||||
|
||||
def set_positions(self, positions):
|
||||
"""
|
||||
set atom coordinates from an array of shape (N,3).
|
||||
|
||||
this method can be used on a modified array obtained from get_positions.
|
||||
N must be the number of atoms defined in the cluster.
|
||||
|
||||
@param positions: numpy.ndarray of shape (N,3) where N is the number of atoms in this cluster.
|
||||
|
||||
@return: None
|
||||
|
||||
@raise AssertionError if the array sizes do not match.
|
||||
"""
|
||||
assert isinstance(positions, np.ndarray)
|
||||
assert positions.shape == (self.data.shape[0], 3)
|
||||
self.data['x'] = positions[:, 0]
|
||||
self.data['y'] = positions[:, 1]
|
||||
self.data['z'] = positions[:, 2]
|
||||
|
||||
def get_position(self, index):
|
||||
"""
|
||||
get the position of a single atom.
|
||||
|
||||
@param index: (int) index of the atom.
|
||||
|
||||
@return numpy.array, shape = (3): position vector.
|
||||
the array instance is independent from the original array.
|
||||
"""
|
||||
rec = self.data[index]
|
||||
return np.array((rec['x'], rec['y'], rec['z']))
|
||||
|
||||
def get_atom_count(self):
|
||||
"""
|
||||
get the number of atoms (positions) in the cluster.
|
||||
|
||||
@return the number of atoms in the cluster.
|
||||
"""
|
||||
return self.data.shape[0]
|
||||
|
||||
def get_atomtype(self, index):
|
||||
"""
|
||||
get the chemical element number of an atom.
|
||||
|
||||
@param index: (int) index of the atom.
|
||||
|
||||
@return int: chemical element number.
|
||||
"""
|
||||
rec = self.data[index]
|
||||
return rec['t']
|
||||
|
||||
def get_symbol(self, index):
|
||||
"""
|
||||
get the chemical element symbol of an atom.
|
||||
|
||||
@param index: (int) index of the atom.
|
||||
|
||||
@return string: chemical element symbol.
|
||||
"""
|
||||
rec = self.data[index]
|
||||
return rec['s']
|
||||
|
||||
def get_emitters(self):
|
||||
"""
|
||||
get a list of all emitters.
|
||||
|
||||
@return list of tuples (x, y, z, atomtype)
|
||||
"""
|
||||
idx = self.data['e'] != 0
|
||||
ems = self.data[['x', 'y', 'z', 't']][idx]
|
||||
return map(tuple, ems)
|
||||
|
||||
def get_emitter_count(self):
|
||||
"""
|
||||
get the number of emitters in the cluster.
|
||||
|
||||
@return the number of atoms marked as emitter.
|
||||
"""
|
||||
idx = self.data['e'] != 0
|
||||
return np.sum(idx)
|
||||
|
||||
def load_from_file(self, f, fmt=FMT_DEFAULT):
|
||||
"""
|
||||
load a cluster from a file created by the scattering program.
|
||||
|
||||
@param f (string/handle): path name or open file handle of the cluster file.
|
||||
|
||||
@param fmt (int): file format.
|
||||
must be one of the FMT_ constants.
|
||||
if FMT_DEFAULT, self.file_format is used.
|
||||
|
||||
@remark if the filename ends in .gz, the file is loaded from compressed gzip format
|
||||
"""
|
||||
if fmt == FMT_DEFAULT:
|
||||
fmt = self.file_format
|
||||
|
||||
if fmt == FMT_MSC:
|
||||
dtype = DTYPE_CLUSTER_MSC
|
||||
fields = FIELDS_CLUSTER_MSC
|
||||
sh = 0
|
||||
elif fmt == FMT_EDAC:
|
||||
dtype = DTYPE_CLUSTER_EDAC
|
||||
fields = FIELDS_CLUSTER_EDAC
|
||||
sh = 1
|
||||
elif fmt == FMT_XYZ:
|
||||
dtype = DTYPE_CLUSTER_XYZ
|
||||
fields = FIELDS_CLUSTER_XYZ
|
||||
sh = 2
|
||||
else:
|
||||
dtype = DTYPE_CLUSTER_XYZ
|
||||
fields = FIELDS_CLUSTER_XYZ
|
||||
sh = 2
|
||||
|
||||
data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
|
||||
self.data = np.empty(data.shape, dtype=self.dtype)
|
||||
self.data['x'] = data['x']
|
||||
self.data['y'] = data['y']
|
||||
self.data['z'] = data['z']
|
||||
if 'i' in fields:
|
||||
self.data['i'] = data['i']
|
||||
else:
|
||||
self.update_index()
|
||||
if 't' in fields:
|
||||
self.data['t'] = data['t']
|
||||
if 's' in fields:
|
||||
self.data['s'] = data['s']
|
||||
else:
|
||||
self.update_symbols()
|
||||
if 't' not in fields:
|
||||
self.update_atomtypes()
|
||||
if 'e' in fields:
|
||||
self.data['e'] = data['e']
|
||||
else:
|
||||
self.data['e'] = 0
|
||||
|
||||
pos = self.positions()
|
||||
# note: np.linalg.norm does not accept axis argument in version 1.7
|
||||
# (check np.version.version)
|
||||
norm = np.sqrt(np.sum(pos**2, axis=1))
|
||||
self.rmax = np.max(norm)
|
||||
|
||||
def update_symbols(self):
|
||||
"""
|
||||
update element symbols from element numbers.
|
||||
|
||||
if you have modified the element numbers in the self.data array directly,
|
||||
this method updates the symbol column to make the data consistent.
|
||||
"""
|
||||
for atom in self.data:
|
||||
atom['s'] = pt.elements[atom['t']].symbol
|
||||
|
||||
def update_atomtypes(self):
|
||||
"""
|
||||
update element numbers from element symbols.
|
||||
|
||||
if you have modified the element symbols in the self.data array directly,
|
||||
this method updates the atom type column to make the data consistent.
|
||||
"""
|
||||
for atom in self.data:
|
||||
atom['t'] = pt.elements.symbol(atom['s'].strip()).number
|
||||
|
||||
def update_index(self):
|
||||
"""
|
||||
update the index column.
|
||||
|
||||
if you have modified the order or number of elements in the self.data array directly,
|
||||
you may need to re-index the atoms if your code uses functions that rely on the index.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.data['i'] = np.arange(1, self.data.shape[0] + 1)
|
||||
|
||||
def save_to_file(self, f, fmt=FMT_DEFAULT, comment=""):
|
||||
"""
|
||||
save the cluster to a file which can be read by the scattering program.
|
||||
|
||||
the method updates the atom index because some file formats require an index column.
|
||||
|
||||
@param f: (string/handle) path name or open file handle of the cluster file.
|
||||
|
||||
@param fmt: (int) file format.
|
||||
must be one of the FMT_ constants.
|
||||
if FMT_DEFAULT, self.file_format is used.
|
||||
|
||||
@param comment: (str) comment line (second line) in XYZ file.
|
||||
not used in other file formats.
|
||||
by default, self.comment is used.
|
||||
|
||||
@remark if the filename ends in .gz, the file is saved in compressed gzip format
|
||||
"""
|
||||
if fmt == FMT_DEFAULT:
|
||||
fmt = self.file_format
|
||||
|
||||
if not comment:
|
||||
comment = self.comment
|
||||
|
||||
if fmt == FMT_MSC:
|
||||
file_format = FMT_CLUSTER_MSC
|
||||
fields = FIELDS_CLUSTER_MSC
|
||||
header = ""
|
||||
elif fmt == FMT_EDAC:
|
||||
file_format = FMT_CLUSTER_EDAC
|
||||
fields = FIELDS_CLUSTER_EDAC
|
||||
header = "%u l(A)" % (self.data.shape[0])
|
||||
elif fmt == FMT_XYZ:
|
||||
file_format = FMT_CLUSTER_XYZ
|
||||
fields = FIELDS_CLUSTER_XYZ
|
||||
header = "{0}\n{1}".format(self.data.shape[0], comment)
|
||||
else:
|
||||
file_format = FMT_CLUSTER_XYZ
|
||||
fields = FIELDS_CLUSTER_XYZ
|
||||
header = "{0}\n{1}".format(self.data.shape[0], comment)
|
||||
|
||||
self.update_index()
|
||||
data = self.data[fields]
|
||||
np.savetxt(f, data, fmt=file_format, header=header, comments="")
|
||||
840
pmsco/data.py
Normal file
840
pmsco/data.py
Normal file
@@ -0,0 +1,840 @@
|
||||
"""
|
||||
@package pmsco.data
|
||||
import, export, evaluation of msc data
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import numpy as np
|
||||
import scipy.optimize as so
|
||||
import loess.loess as loess
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
## energy, intensity
|
||||
DTYPE_EI = [('e', 'f4'), ('i', 'f4')]
|
||||
## energy, theta, phi, intensity
|
||||
DTYPE_ETPI = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('i', 'f4')]
|
||||
## energy, theta, phi, intensity, sigma (standard deviation)
|
||||
DTYPE_ETPIS = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('i', 'f4'), ('s', 'f4')]
|
||||
## energy, theta, phi, alpha, intensity
|
||||
DTYPE_ETPAI = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('a', 'f4'), ('i', 'f4')]
|
||||
## energy, theta, phi, alpha, intensity, sigma (standard deviation)
|
||||
DTYPE_ETPAIS = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('a', 'f4'), ('i', 'f4'), ('s', 'f4')]
|
||||
## theta, phi
|
||||
DTYPE_TP = [('t', 'f4'), ('p', 'f4')]
|
||||
## theta, phi, intensity
|
||||
DTYPE_TPI = [('t', 'f4'), ('p', 'f4'), ('i', 'f4')]
|
||||
## theta, phi, intensity, sigma (standard deviation)
|
||||
DTYPE_TPIS = [('t', 'f4'), ('p', 'f4'), ('i', 'f4'), ('s', 'f4')]
|
||||
|
||||
DTYPES = {'EI': DTYPE_EI, 'ETPI': DTYPE_ETPI, 'ETPIS': DTYPE_ETPIS, 'ETPAI': DTYPE_ETPAI, 'ETPAIS': DTYPE_ETPAIS,
|
||||
'TP': DTYPE_TP, 'TPI': DTYPE_TPI, 'TPIS': DTYPE_TPIS, }
|
||||
DATATYPES = DTYPES.keys
|
||||
|
||||
## supportd scan types
|
||||
# @arg @c 'E' energy
|
||||
# @arg @c 'EA' energy - alpha (analyser)
|
||||
# @arg @c 'ET' energy - theta
|
||||
# @arg @c 'TP' theta - phi (holo scan)
|
||||
SCANTYPES = ['E', 'EA', 'ET', 'TP']
|
||||
|
||||
|
||||
def create_etpi(shape, sigma_column=True):
|
||||
"""
|
||||
create an ETPI array of a given size.
|
||||
|
||||
an ETPI array is a numpy structured array.
|
||||
the array is initialized with zeroes.
|
||||
|
||||
@param shape (tuple) shape of the array
|
||||
"""
|
||||
if sigma_column:
|
||||
data = np.zeros(shape, dtype=DTYPE_ETPIS)
|
||||
else:
|
||||
data = np.zeros(shape, dtype=DTYPE_ETPI)
|
||||
return data
|
||||
|
||||
|
||||
def create_data(shape, datatype='', dtype=None):
|
||||
"""
|
||||
create a data array of a given size and type.
|
||||
|
||||
a data array is a numpy structured array.
|
||||
the array is initialized with zeroes.
|
||||
either datatype or dtype must be specified, dtypes takes precedence.
|
||||
|
||||
@param shape (tuple) shape of the array, only scalars (1-tuples) supported currently
|
||||
@param datatype see DATATYPES
|
||||
@param dtype see DTYPES
|
||||
"""
|
||||
if not dtype:
|
||||
dtype = DTYPES[datatype]
|
||||
data = np.zeros(shape, dtype=dtype)
|
||||
return data
|
||||
|
||||
|
||||
def load_plt(filename, int_column=-1):
|
||||
"""
|
||||
loads ETPI data from an MSC output (plt) file
|
||||
|
||||
plt file format:
|
||||
5-9 columns, space or tab delimited
|
||||
column 0: energy
|
||||
column 1: momentum
|
||||
column 2: theta
|
||||
column 3: phi
|
||||
columns 4-8: intensities
|
||||
comment lines must start with # character
|
||||
|
||||
filename: path or name of the file to be read
|
||||
|
||||
int_column: index of the column to be read as intensity
|
||||
typical values: 4, 5, 6, 7, 8
|
||||
or negative: -1 (last), -2, (second last), ...
|
||||
default: -1
|
||||
|
||||
returns a structured one-dimensional numpy.ndarray
|
||||
|
||||
data[i]['e'] = energy
|
||||
data[i]['t'] = theta
|
||||
data[i]['p'] = phi
|
||||
data[i]['i'] = selected intensity column
|
||||
"""
|
||||
data = np.genfromtxt(filename, usecols=(0, 2, 3, int_column), dtype=DTYPE_ETPI)
|
||||
sort_data(data)
|
||||
return data
|
||||
|
||||
|
||||
def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_cluster=False):
|
||||
"""
|
||||
load ETPI or ETPAI data from an EDAC PD output file.
|
||||
|
||||
EDAC file format:
|
||||
@arg row 0: "--- scan PD"
|
||||
@arg row 1: column names
|
||||
@arg rows 2 and following: space delimited data
|
||||
|
||||
@arg first columns (up to 3): energy, theta, phi depending on scan
|
||||
@arg last columns (arbitrary number): intensity at the recursion order specified in the header
|
||||
|
||||
@param filename: path or name of the file to be read
|
||||
|
||||
@param int_column: index of the column to be read as intensity.
|
||||
typical values: -1 (last), -2, (second last), ...
|
||||
default: -1
|
||||
|
||||
@param energy: default value if energy column is missing
|
||||
@param theta: default value if theta column is missing
|
||||
@param phi: default value if phi column is missing
|
||||
|
||||
@param fixed_cluster:
|
||||
if True, (theta, phi) are mapped to (alpha, phi). theta is copied from function argument.
|
||||
if False, angles are copied literally.
|
||||
|
||||
@return a structured one-dimensional numpy.ndarray (ETPI or ETPAI)
|
||||
|
||||
@verbatim
|
||||
data[i]['e'] = energy
|
||||
data[i]['t'] = theta
|
||||
data[i]['p'] = phi
|
||||
data[i]['i'] = selected intensity column
|
||||
@endverbatim
|
||||
"""
|
||||
with open(filename, 'r') as f:
|
||||
header1 = f.readline().strip()
|
||||
header2 = f.readline().strip()
|
||||
if not header1 == '--- scan PD':
|
||||
logger.warning("unexpected EDAC output file header format")
|
||||
|
||||
col_names = header2.split()
|
||||
dtype = []
|
||||
cols = []
|
||||
ncols = 0
|
||||
for name in col_names:
|
||||
if name == "eV":
|
||||
dtype.append(('e', 'f4'))
|
||||
cols.append(ncols)
|
||||
ncols += 1
|
||||
elif name == "theta":
|
||||
dtype.append(('t', 'f4'))
|
||||
cols.append(ncols)
|
||||
ncols += 1
|
||||
elif name == "phi":
|
||||
dtype.append(('p', 'f4'))
|
||||
cols.append(ncols)
|
||||
ncols += 1
|
||||
elif name == "order":
|
||||
dtype.append(('i', 'f4'))
|
||||
cols.append(int_column)
|
||||
ncols += 1
|
||||
break
|
||||
else:
|
||||
logger.warning("unexpected EDAC output file column name")
|
||||
break
|
||||
cols = tuple(cols)
|
||||
raw = np.genfromtxt(filename, usecols=cols, dtype=dtype, skip_header=2)
|
||||
|
||||
if fixed_cluster:
|
||||
etpi = np.empty(raw.shape, dtype=DTYPE_ETPAI)
|
||||
else:
|
||||
etpi = np.empty(raw.shape, dtype=DTYPE_ETPI)
|
||||
|
||||
if 'eV' in col_names:
|
||||
etpi['e'] = raw['e']
|
||||
else:
|
||||
etpi['e'] = energy
|
||||
if 'theta' in col_names:
|
||||
etpi['t'] = raw['t']
|
||||
else:
|
||||
etpi['t'] = theta
|
||||
if 'phi' in col_names:
|
||||
etpi['p'] = raw['p']
|
||||
else:
|
||||
etpi['p'] = phi
|
||||
etpi['i'] = raw['i']
|
||||
|
||||
if fixed_cluster:
|
||||
etpi['a'] = etpi['t']
|
||||
etpi['t'] = theta
|
||||
|
||||
sort_data(etpi)
|
||||
return etpi
|
||||
|
||||
|
||||
def load_etpi(filename):
|
||||
"""
|
||||
loads ETPI or ETPIS data from a text file
|
||||
|
||||
etpi file format:
|
||||
4 or 5 columns, space or tab delimited
|
||||
column 0: energy
|
||||
column 1: theta
|
||||
column 2: phi
|
||||
column 3: intensity
|
||||
column 4: sigma error (standard deviation). optional defaults to 0.
|
||||
comment lines must start with # character
|
||||
comment lines may appear anywhere, and are ignored
|
||||
|
||||
filename: path or name of the file to be read
|
||||
load_etpi handles compressed files (ending .gz) transparently.
|
||||
|
||||
returns a structured one-dimensional numpy.ndarray
|
||||
|
||||
data[i]['e'] = energy
|
||||
data[i]['t'] = theta
|
||||
data[i]['p'] = phi
|
||||
data[i]['i'] = intensity
|
||||
data[i]['s'] = sigma
|
||||
|
||||
@deprecated new code should use load_data().
|
||||
"""
|
||||
try:
|
||||
data = np.loadtxt(filename, dtype=DTYPE_ETPIS)
|
||||
except IndexError:
|
||||
data = np.loadtxt(filename, dtype=DTYPE_ETPI)
|
||||
sort_data(data)
|
||||
return data
|
||||
|
||||
|
||||
def load_data(filename, dtype=None):
|
||||
"""
|
||||
load column data (ETPI, and the like) from a text file.
|
||||
|
||||
the extension must specify one of DATATYPES (case insensitive)
|
||||
corresponding to the meaning of the columns in the file.
|
||||
|
||||
@param filename
|
||||
|
||||
@param dtype: override data type recognition if the extension cannot be used.
|
||||
must be one of the data.DTYPE constants
|
||||
DTYPE_EI, DTYPE_ETPI, DTYPE_ETPIS, DTYPE_ETPAI, or DTYPE_ETPAIS.
|
||||
by default, the function uses the extension to determine the data type.
|
||||
the actual type can be read from the dtype attribute of the returned array.
|
||||
|
||||
@return one-dimensional numpy structured ndarray with data
|
||||
"""
|
||||
if not dtype:
|
||||
(root, ext) = os.path.splitext(filename)
|
||||
datatype = ext[1:].upper()
|
||||
dtype = DTYPES[datatype]
|
||||
|
||||
data = np.loadtxt(filename, dtype=dtype)
|
||||
sort_data(data)
|
||||
return data
|
||||
|
||||
|
||||
def save_data(filename, data):
|
||||
"""
|
||||
save column data (ETPI, and the like) to a text file.
|
||||
|
||||
the extension must specify one of DATATYPES (case insensitive)
|
||||
corresponding to the meaning of the columns in the file.
|
||||
|
||||
@param filename
|
||||
|
||||
@param data ETPI-like structured numpy.ndarray.
|
||||
|
||||
@remark this function is plain numpy.savetxt, provided for convenience.
|
||||
"""
|
||||
np.savetxt(filename, data, fmt='%g')
|
||||
|
||||
|
||||
def sort_data(data):
|
||||
"""
|
||||
sort scan data (ETPI and the like) in a consistent order.
|
||||
|
||||
the function sorts the data array along the scan dimensions energy, theta, phi and alpha.
|
||||
this function should be used for all sorting of measured and calculated data
|
||||
to ensure a consistent sort order.
|
||||
|
||||
the function determines the sort key based on the scan fields of the data array,
|
||||
ignoring the intensity and sigma fields.
|
||||
|
||||
the function uses the _mergesort_ algorithm which preserves the relative order of indistinct elements.
|
||||
|
||||
@warning sorting on intensity and sigma fields would mix up the scan dimensions and produce invalid results!
|
||||
|
||||
@param data ETPI-like structured numpy.ndarray.
|
||||
|
||||
@return: None. the data array is sorted in place.
|
||||
"""
|
||||
sort_key = [name for name in data.dtype.names if name in {'e', 't', 'p', 'a'}]
|
||||
data.sort(kind='mergesort', order=sort_key)
|
||||
|
||||
|
||||
def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
|
||||
"""
|
||||
restructure the type of a data array by adding or removing columns.
|
||||
|
||||
example: to combine an ETPI and an ETPAI scan, both arrays must have the same data type.
|
||||
this function adds the necessary columns and initializes them with default values.
|
||||
to find out the appropriate data type, use the common_dtype() function.
|
||||
to concatenate arrays, call numpy.hstack on a tuple of arrays.
|
||||
|
||||
@param data: original data array (a structured numpy array having one of the DTYPES data types).
|
||||
|
||||
@param dtype: data type of the new array. must be one out of DTYPES.
|
||||
default is DTYPE_ETPAIS which includes any possible field.
|
||||
|
||||
@param defaults: default values for new fields.
|
||||
this must be a dictionary where the key is the field name and value the default value of the field.
|
||||
the dictionary can contain an arbitrary sub-set of fields.
|
||||
undefined fields are initialized to zero.
|
||||
if the parameter is unspecified, all fields are initialized to zero.
|
||||
|
||||
@return: re-structured numpy array
|
||||
"""
|
||||
new_data = np.zeros(data.shape, dtype=dtype)
|
||||
fields = [dt[0] for dt in dtype if dt[0] in data.dtype.names]
|
||||
|
||||
if defaults is not None:
|
||||
for field, value in defaults.iteritems():
|
||||
if field in new_data.dtype.names:
|
||||
new_data[field] = value
|
||||
|
||||
for field in fields:
|
||||
new_data[field] = data[field]
|
||||
|
||||
return new_data
|
||||
|
||||
|
||||
def common_dtype(scans):
|
||||
"""
|
||||
determine the common data type for a number of scans.
|
||||
|
||||
example: to combine an ETPI and an ETPAI scan, both arrays must have the same data type.
|
||||
this function determines the least common data type.
|
||||
to restructure each array, use the restructure_data() function.
|
||||
to concatenate arrays, call numpy.hstack on a tuple of arrays.
|
||||
|
||||
@param scans: iterable of scan data or types.
|
||||
the elements of the list must be ETPI-like numpy structured arrays,
|
||||
numpy.dtype specifiers of a permitted ETPI-like array,
|
||||
or one of the DTYPE constants listed in DTYPES.
|
||||
|
||||
@return: DTYPE constant which includes all the fields referred to in the input data.
|
||||
"""
|
||||
fields = set([])
|
||||
for item in scans:
|
||||
if isinstance(item, np.ndarray):
|
||||
names = item.dtype.names
|
||||
elif isinstance(item, np.dtype):
|
||||
names = item.names
|
||||
else:
|
||||
names = [dt[0] for dt in item]
|
||||
for name in names:
|
||||
fields.add(name)
|
||||
|
||||
dtype = [dt for dt in DTYPE_ETPAIS if dt[0] in fields]
|
||||
return dtype
|
||||
|
||||
|
||||
def detect_scan_mode(data):
|
||||
"""
|
||||
detect the scan mode and unique scan positions in a data array.
|
||||
|
||||
the function detects which columns of the data array are scanning.
|
||||
if the values of a column are not constant, the column is considered to be scanning.
|
||||
the function does not require a particular ordering of the scan positions
|
||||
(although other parts of the code may do so).
|
||||
the function returns the names of the scanning columns.
|
||||
|
||||
the function also extracts unique positions for each column, and returns one array per column of input data.
|
||||
in the case of a fixed (non-scanning) column, the resulting array contains one data point.
|
||||
if the input data does not contain a particular column, the resulting array will contain 0 per default.
|
||||
|
||||
if both theta and phi columns are non-constant, the function reports a theta-phi scan.
|
||||
in a theta-phi scan, each pair (theta, phi) is considered a scan position,
|
||||
and uniqueness is enforced with respect to the (theta, phi) pairs.
|
||||
the individual theta and phi arrays may contain duplicate values.
|
||||
|
||||
@param data ETPI-like structured numpy.ndarray.
|
||||
only the 'e', 't', 'p', and 'a' columns are considered.
|
||||
|
||||
@return the tuple (scan_mode, scan_positions), where
|
||||
@arg scan_mode is a list of column names that refer to the scanned variables,
|
||||
i.e. non-constant columns in the input data.
|
||||
possible values are 'e', 't', 'p', and 'a'.
|
||||
@arg scan_positions is a dictionary of scan dimensions.
|
||||
the dictionary contains one-dimensional numpy arrays, one for each dimension.
|
||||
the dictionary keys are 'e', 't', 'p', and 'a'.
|
||||
if a dimension is not scanned, the corresponding array contains just one element.
|
||||
if the input data does not contain a column at all,
|
||||
the corresponding output array is not included in the dictionary.
|
||||
|
||||
note the special case of theta-phi scans.
|
||||
theta and phi are always returned as two separate arrays
|
||||
"""
|
||||
scan_mode = []
|
||||
|
||||
try:
|
||||
scan_energy = np.unique(data['e'])
|
||||
except ValueError:
|
||||
scan_energy = np.array([])
|
||||
try:
|
||||
scan_theta = np.unique(data['t'])
|
||||
except ValueError:
|
||||
scan_theta = np.array([])
|
||||
try:
|
||||
scan_phi = np.unique(data['p'])
|
||||
except ValueError:
|
||||
scan_phi = np.array([])
|
||||
try:
|
||||
scan_alpha = np.unique(data['a'])
|
||||
except ValueError:
|
||||
scan_alpha = np.array([])
|
||||
|
||||
# theta-phi scan
|
||||
if scan_theta.shape[0] >= 2 and scan_phi.shape[0] >= 2:
|
||||
try:
|
||||
scan_theta_phi = np.unique(data[['t', 'p']])
|
||||
except ValueError:
|
||||
scan_theta_phi = None
|
||||
if scan_theta_phi is not None and len(scan_theta_phi.dtype.names) == 2:
|
||||
scan_theta = scan_theta_phi['t']
|
||||
scan_phi = scan_theta_phi['p']
|
||||
|
||||
scan_positions = {}
|
||||
if scan_energy.shape[0] >= 1:
|
||||
scan_positions['e'] = scan_energy
|
||||
if scan_energy.shape[0] >= 2:
|
||||
scan_mode.append('e')
|
||||
if scan_theta.shape[0] >= 1:
|
||||
scan_positions['t'] = scan_theta
|
||||
if scan_theta.shape[0] >= 2:
|
||||
scan_mode.append('t')
|
||||
if scan_phi.shape[0] >= 1:
|
||||
scan_positions['p'] = scan_phi
|
||||
if scan_phi.shape[0] >= 2:
|
||||
scan_mode.append('p')
|
||||
if scan_alpha.shape[0] >= 1:
|
||||
scan_positions['a'] = scan_alpha
|
||||
if scan_alpha.shape[0] >= 2:
|
||||
scan_mode.append('a')
|
||||
|
||||
return scan_mode, scan_positions
|
||||
|
||||
|
||||
def filter_tp(data, filter):
|
||||
"""
|
||||
select data points from an ETPI array that match theta and phi coordinates of another ETPI array.
|
||||
|
||||
the matching tolerance is 0.001.
|
||||
|
||||
@param data ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
|
||||
|
||||
@param filter ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
|
||||
only 't' and 'p' columns are used.
|
||||
|
||||
@return filtered data (numpy.ndarray)
|
||||
copy of selected data rows from input data.
|
||||
same data type as input data.
|
||||
"""
|
||||
# copy theta,phi into separate structured arrays
|
||||
data_tp = np.zeros_like(data, dtype=[('t', '<i4'), ('p', '<i4')])
|
||||
filter_tp = np.zeros_like(filter, dtype=[('t', '<i4'), ('p', '<i4')])
|
||||
# multiply by 10, round to integer
|
||||
data_tp['t'] = np.around(data['t'] * 10.0)
|
||||
data_tp['p'] = np.around(data['p'] * 10.0)
|
||||
filter_tp['t'] = np.around(filter['t'] * 10.0)
|
||||
filter_tp['p'] = np.around(filter['p'] * 10.0)
|
||||
# calculate intersection
|
||||
idx = np.in1d(data_tp, filter_tp)
|
||||
result = data[idx]
|
||||
return result
|
||||
|
||||
def interpolate_hemi_scan(rect_tpi, hemi_tpi):
|
||||
"""
|
||||
interpolate a hemispherical scan from a rectangular angle scan.
|
||||
|
||||
the function interpolates in phi (azimuth) only.
|
||||
the rectangular array must contain a matching scan line for each theta (polar angle) of the hemi scan.
|
||||
this requires that the hemi scan have a linear theta axis.
|
||||
|
||||
@param rect_tpi TPI structured numpy.ndarray.
|
||||
rectangular theta-phi scan.
|
||||
each azimuthal line has the same number of points and range.
|
||||
the azimuthal coordinate is monotonically increasing.
|
||||
@param hemi_tpi TPI structured numpy.ndarray.
|
||||
hemispherical theta-phi scan.
|
||||
each theta of the hemi scan must have a matching scan line in the rectangular scan.
|
||||
the array may contain additional columns (E, A, S) as long as each (theta,phi) pair is unique.
|
||||
the extra columns are not altered.
|
||||
@return hemi_tpi with the interpolation result in the I column.
|
||||
"""
|
||||
lin_theta = np.unique(hemi_tpi['t'])
|
||||
for theta in lin_theta:
|
||||
sel_theta = np.abs(hemi_tpi['t'] - theta) < 0.1
|
||||
lin_phi = hemi_tpi['p'][sel_theta]
|
||||
|
||||
sel_rect_theta = np.abs(rect_tpi['t'] - theta) < 0.1
|
||||
rect_phi_1d = rect_tpi['p'][sel_rect_theta]
|
||||
rect_int_1d = rect_tpi['i'][sel_rect_theta]
|
||||
|
||||
result = np.interp(lin_phi, rect_phi_1d, rect_int_1d)
|
||||
hemi_tpi['i'][sel_theta] = result
|
||||
return hemi_tpi
|
||||
|
||||
def reshape_2d(flat_data, axis_columns, return_column='i'):
|
||||
"""
|
||||
reshape an ETPI-like array into a two-dimensional array according to the scan axes.
|
||||
|
||||
@param flat_data structured, one-dimensional numpy.ndarray with column labels.
|
||||
the array must contain a rectangular scan grid.
|
||||
the array must be sorted in the order of axis_labels.
|
||||
|
||||
@param axis_columns list of column names that designate the axes
|
||||
|
||||
@return the tuple (result_data, axis0, axis1), where
|
||||
@arg result_data (ndarray) new two-dimensional ndarray of the scan
|
||||
@arg axis0 (ndarray) scan positions along the first dimension
|
||||
@arg axis1 (ndarray) scan positions along the second dimension
|
||||
"""
|
||||
|
||||
axis0 = np.unique(flat_data[axis_columns[0]])
|
||||
n0 = len(axis0)
|
||||
axis1 = np.unique(flat_data[axis_columns[1]])
|
||||
n1 = len(axis1)
|
||||
data = np.reshape(flat_data[return_column], (n0, n1), order='C')
|
||||
return data.copy(), axis0, axis1
|
||||
|
||||
|
||||
def calc_modfunc_mean(data):
|
||||
"""
|
||||
calculates the modulation function using the mean value of data.
|
||||
this is a simplified calculation method
|
||||
which can be used if the I0 of the data does not have a strong variation.
|
||||
|
||||
@param data: ETPI array containing experimental or calculated intensity.
|
||||
|
||||
@return ETPI array containing the modulation function.
|
||||
"""
|
||||
|
||||
scan_mode, scan_positions = detect_scan_mode(data)
|
||||
modf = data.copy()
|
||||
|
||||
if len(scan_mode) == 1:
|
||||
norm = np.mean(data['i'], dtype=np.float64)
|
||||
modf = data.copy()
|
||||
modf['i'] = (data['i'] - norm) / norm
|
||||
elif len(scan_mode) == 2:
|
||||
axis0 = scan_positions[scan_mode[0]]
|
||||
n0 = len(axis0)
|
||||
axis1 = scan_positions[scan_mode[1]]
|
||||
n1 = len(axis1)
|
||||
nd_data = np.reshape(data['i'], (n0, n1), order='C')
|
||||
|
||||
prof0 = np.mean(nd_data, axis=1, dtype=np.float64)
|
||||
norm0 = np.mean(prof0, dtype=np.float64)
|
||||
nd_modf = (nd_data - norm0) / norm0
|
||||
|
||||
modf['i'] = np.ravel(nd_modf, order='C')
|
||||
else:
|
||||
logger.error('unsupported scan in calc_modfunc_mean: {0}'.format(scan_mode))
|
||||
|
||||
return modf
|
||||
|
||||
|
||||
def calc_modfunc_loess(data):
|
||||
"""
|
||||
calculate the modulation function using LOESS (locally weighted regression) smoothing.
|
||||
|
||||
the modulation function of I(x) is (I(x) - S(x)) / S(x)
|
||||
where the array S(x) is a LOESS-smoothed copy of I(x).
|
||||
|
||||
this function uses true multi-dimensional LOESS smoothing,
|
||||
in the same way as Igor's Loess operation.
|
||||
|
||||
this function uses the LOESS algorithm implemented by
|
||||
William S. Cleveland, Eric Grosse, Ming-Jen Shyu, dated 18 August 1992.
|
||||
the code and the python interface are included in the loess package.
|
||||
|
||||
@param data structured numpy.ndarray in EI, ETPI, or ETPAI format.
|
||||
can contain a one- or multi-dimensional scan.
|
||||
the algorithm does not require any specific scan mode or order
|
||||
(no rectangular grid, no particular scan hierarchy, no sorting).
|
||||
|
||||
if data contains a hemispherical scan, the phi dimension is ignored,
|
||||
i.e. the function effectively applies a phi-average.
|
||||
|
||||
the modulation function is calculated for the finite-valued scan points.
|
||||
NaNs are ignored and do not affect the finite values.
|
||||
|
||||
@return copy of the data array with the modulation function in the 'i' column.
|
||||
|
||||
@todo is a fixed smoothing factor of 0.5 okay?
|
||||
"""
|
||||
sel = np.isfinite(data['i'])
|
||||
_data = data[sel]
|
||||
|
||||
modf = data.copy()
|
||||
if _data.shape[0]:
|
||||
scan_mode, __ = detect_scan_mode(_data)
|
||||
if 't' in scan_mode and 'p' in scan_mode:
|
||||
scan_mode.remove('p')
|
||||
|
||||
lo = loess.loess_struct(_data.shape[0], len(scan_mode))
|
||||
factors = [_data[axis] for axis in scan_mode]
|
||||
lo.set_x(np.hstack(tuple(factors)))
|
||||
lo.set_y(_data['i'])
|
||||
lo.model.span = 0.5
|
||||
loess.loess(lo)
|
||||
|
||||
modf['i'][sel] = lo.get_fitted_residuals() / lo.get_fitted_values()
|
||||
else:
|
||||
modf['i'] = np.nan
|
||||
|
||||
return modf
|
||||
|
||||
|
||||
def rfactor(experiment, theory):
|
||||
"""
|
||||
calculate the R-factor of a calculated modulation function.
|
||||
|
||||
if the sigma column is present in experiment and non-zero,
|
||||
the R-factor terms are weighted by 1/sigma**2.
|
||||
|
||||
the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
|
||||
the array elements are compared element-by-element.
|
||||
terms having NaN intensity are ignored.
|
||||
|
||||
@param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
||||
|
||||
@param theory: ETPI or ETPAI array containing the calculated modulation functions.
|
||||
|
||||
@return scalar R-factor in the range from 0.0 to 2.0.
|
||||
|
||||
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
sel = np.logical_and(np.isfinite(theory['i']), np.isfinite(experiment['i']))
|
||||
theory = theory[sel]
|
||||
experiment = experiment[sel]
|
||||
if ('s' in experiment.dtype.names) and (experiment['s'].min()) > 0.0:
|
||||
wgts = 1.0 / experiment['s'] ** 2
|
||||
else:
|
||||
wgts = 1.0
|
||||
difs = wgts * (experiment['i'] - theory['i']) ** 2
|
||||
sums = wgts * (experiment['i'] ** 2 + theory['i'] ** 2)
|
||||
sum1 = difs.sum(dtype=np.float64)
|
||||
sum2 = sums.sum(dtype=np.float64)
|
||||
return sum1 / sum2
|
||||
|
||||
|
||||
def scaled_rfactor(scale, experiment, weights, theory):
|
||||
"""
|
||||
calculate the R-factor of a modulation function against the measurement with scaled amplitude.
|
||||
|
||||
this function allows to apply a scaling factor to the experimental function and returns the R-factor.
|
||||
this is useful if the amplitudes of the two functions do not match due to systematic effects
|
||||
of the calculation or the measurement.
|
||||
|
||||
this function is used by optimize_rfactor() as a scipy.optimize.least_squares optimization function,
|
||||
which requires a specific signature.
|
||||
|
||||
NaNs will propagate to the final result.
|
||||
math exceptions are not handled.
|
||||
|
||||
@param scale: scaling factor (> 0).
|
||||
the experimental modulation function is multiplied by this parameter.
|
||||
< 1 (> 1) decreases (increases) the experimental amplitude.
|
||||
the R factor is calculated using the scaled modulation function.
|
||||
|
||||
@param experiment: numpy.ndarray containing the experimental modulation function
|
||||
|
||||
@param weights: numpy.ndarray containing the experimental weights
|
||||
|
||||
@param theory: numpy.ndarray containing the theoretical modulation function
|
||||
|
||||
@return: scalar R-factor in the range from 0.0 to 2.0.
|
||||
nan if any element of the function arguments is nan.
|
||||
|
||||
@raise ValueError if all experiments and theory values or all weights are zero.
|
||||
"""
|
||||
difs = weights * (scale * experiment - theory) ** 2
|
||||
sums = weights * (scale ** 2 * experiment ** 2 + theory ** 2)
|
||||
sum1 = difs.sum(dtype=np.float64)
|
||||
sum2 = sums.sum(dtype=np.float64)
|
||||
return sum1 / sum2
|
||||
|
||||
|
||||
def optimize_rfactor(experiment, theory):
|
||||
"""
|
||||
calculate the R-factor of a calculated modulation function against the measurement, adjusting their amplitude.
|
||||
|
||||
if the sigma column is present in experiment and non-zero,
|
||||
the R-factor terms are weighted by 1/sigma**2.
|
||||
|
||||
this function varies the scale of the experimental function and returns the minimum R-factor.
|
||||
this is useful if the amplitudes of the two functions do not match due to systematic effects
|
||||
of the calculation or the measurement.
|
||||
|
||||
the optimization is done in a scipy.optimize.least_squares optimization of the scaled_rfactor() function.
|
||||
the initial guess of the scaling factor is 0.7, the constraining boundaries are 1/10 and 10.
|
||||
|
||||
the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
|
||||
the array elements are compared element-by-element.
|
||||
terms having NaN intensity are ignored.
|
||||
|
||||
@param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
||||
|
||||
@param theory: ETPI or ETPAI array containing the calculated modulation functions.
|
||||
|
||||
@return scalar R-factor in the range from 0.0 to 2.0.
|
||||
|
||||
@raise ValueError if the optimization fails (e.g. division by zero or all elements non-finite).
|
||||
"""
|
||||
sel = np.logical_and(np.isfinite(theory['i']), np.isfinite(experiment['i']))
|
||||
theory = theory[sel]
|
||||
experiment = experiment[sel]
|
||||
if ('s' in experiment.dtype.names) and (experiment['s'].min() > 0.0):
|
||||
wgts = 1.0 / experiment['s'] ** 2
|
||||
else:
|
||||
wgts = np.ones_like(experiment['i'])
|
||||
|
||||
result = so.least_squares(scaled_rfactor, 0.7, bounds=(0.1, 10.0), args=(experiment['i'], wgts, theory['i']))
|
||||
result_r = scaled_rfactor(result.x, experiment['i'], wgts, theory['i'])
|
||||
|
||||
return result_r
|
||||
|
||||
|
||||
def alpha_average(data):
|
||||
"""
|
||||
average I(alpha, theta, phi) over alpha.
|
||||
|
||||
@param data structured numpy.ndarray in ETPAI or ETPAIS format with a non-singular alpha dimension.
|
||||
|
||||
@return resulting ETPI or ETPIS data array.
|
||||
"""
|
||||
scan_mode, scan_positions = detect_scan_mode(data)
|
||||
result = data.copy()
|
||||
|
||||
if len(scan_mode) == 2 and scan_mode[1] == 'a':
|
||||
axis0 = scan_positions[scan_mode[0]]
|
||||
n0 = len(axis0)
|
||||
axis1 = scan_positions[scan_mode[1]]
|
||||
n1 = len(axis1)
|
||||
nd_data = np.reshape(data, (n0, n1), order='C')
|
||||
|
||||
nd_result = nd_data[:, 0]
|
||||
names = list(nd_data.dtype.names)
|
||||
names.remove('a')
|
||||
for name in names:
|
||||
nd_result[name] = np.mean(nd_data[name], axis=1, dtype=np.float64)
|
||||
result = nd_result[names]
|
||||
else:
|
||||
logger.error('unsupported scan in alpha_average: {0}'.format(scan_mode))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def phi_average(data):
|
||||
"""
|
||||
average I(theta, phi) over phi.
|
||||
|
||||
@param data TPI-like structured numpy.ndarray containing a hemispherical scan.
|
||||
|
||||
@return resulting TI or TIS data array.
|
||||
"""
|
||||
scan_mode, scan_positions = detect_scan_mode(data)
|
||||
result = data.copy()
|
||||
|
||||
if scan_mode == ['t', 'p']:
|
||||
t_axis = np.unique(scan_positions['t'])
|
||||
nt = len(t_axis)
|
||||
|
||||
names = list(data.dtype.names)
|
||||
names.remove('p')
|
||||
dtype = [(name, data.dtype[name].str) for name in names]
|
||||
result = create_data((nt), dtype=dtype)
|
||||
|
||||
for i,t in enumerate(t_axis):
|
||||
sel = np.abs(scan_positions['t'] - t) < 0.01
|
||||
for name in names:
|
||||
result[name][i] = np.mean(data[name][sel], dtype=np.float64)
|
||||
else:
|
||||
logger.error('unsupported scan in phi_average: {0}'.format(scan_mode))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def alpha_mirror_average(data):
|
||||
"""
|
||||
calculate the average of I(alpha, theta, phi) and I(-alpha, theta, phi).
|
||||
|
||||
@param data structured numpy.ndarray in ETPAI or ETPAIS format.
|
||||
for each (alpha, theta, phi) the array must contain a corresponding (-alpha, theta, phi)
|
||||
within a tolerance of 0.5 degrees in alpha. otherwise, a warning is issued.
|
||||
|
||||
@return resulting data array, same shape as input.
|
||||
the array is sorted.
|
||||
"""
|
||||
|
||||
result1 = data.copy()
|
||||
sort_data(result1)
|
||||
|
||||
result2 = data.copy()
|
||||
try:
|
||||
result2['a'] = -result2['a']
|
||||
sort_data(result2)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if np.allclose(result1['a'], result2['a'], atol=0.5):
|
||||
result1['i'] = (result1['i'] + result2['i']) / 2.0
|
||||
try:
|
||||
result1['s'] = np.sqrt(result1['s'] ** 2 + result2['s'] ** 2) / 2.0
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
logger.warning('asymmetric alpha scan. skipping alpha mirror average.')
|
||||
|
||||
return result1
|
||||
972
pmsco/dispatch.py
Normal file
972
pmsco/dispatch.py
Normal file
@@ -0,0 +1,972 @@
|
||||
"""
|
||||
@package pmsco.dispatch
|
||||
calculation dispatcher.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import os
|
||||
import os.path
|
||||
import datetime
|
||||
import signal
|
||||
import collections
|
||||
import copy
|
||||
import logging
|
||||
from mpi4py import MPI
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# messages sent from master to slaves
|
||||
|
||||
## master sends new assignment
|
||||
## the message is a dictionary of model parameters
|
||||
TAG_NEW_TASK = 1
|
||||
## master calls end of calculation
|
||||
## the message is empty
|
||||
TAG_FINISH = 2
|
||||
|
||||
# messages sent from slaves to master
|
||||
|
||||
## slave reports new result
|
||||
## the message is a dictionary of model parameters and results
|
||||
TAG_NEW_RESULT = 1
|
||||
## slave confirms end of calculation
|
||||
## currently not used
|
||||
TAG_FINISHED = 2
|
||||
## slave has encountered an error, result is invalid
|
||||
## the message contains the original task message
|
||||
TAG_INVALID_RESULT = 3
|
||||
## slave has encountered an error and is aborting
|
||||
## the message is empty
|
||||
TAG_ERROR_ABORTING = 4
|
||||
|
||||
CalcID = collections.namedtuple('CalcID', ['model', 'scan', 'sym', 'emit', 'region'])
|
||||
|
||||
|
||||
class CalculationTask(object):
|
||||
"""
|
||||
identifies a calculation task by index and model parameters.
|
||||
|
||||
given an object of this class, the project must be able to:
|
||||
* produce calculation parameters,
|
||||
* produce a cluster,
|
||||
* gather results.
|
||||
|
||||
a calculation task is identified by:
|
||||
|
||||
@arg @c id.model structure number or iteration (handled by the mode module)
|
||||
@arg @c id.scan scan number (handled by the project)
|
||||
@arg @c id.sym symmetry number (handled by the project)
|
||||
@arg @c id.emit emitter number (handled by the project)
|
||||
@arg @c id.region region number (handled by the region handler)
|
||||
|
||||
specified members must be greater or equal to zero.
|
||||
-1 is the wildcard which is used in parent tasks,
|
||||
where, e.g., no specific symmetry is chosen.
|
||||
the root task has the ID (-1, -1, -1, -1).
|
||||
"""
|
||||
|
||||
## @var id (CalcID)
|
||||
# named tuple CalcID containing the 4-part calculation task identifier.
|
||||
|
||||
## @var parent_id (CalcID)
|
||||
# named tuple CalcID containing the task identifier of the parent task.
|
||||
|
||||
## @var model (dict)
|
||||
# dictionary containing the model parameters of the task.
|
||||
#
|
||||
# this is typically initialized to the parameters of the parent task,
|
||||
# and varied at the level where the task ID was produced.
|
||||
|
||||
## @var file_root (string)
|
||||
# file name without extension and index.
|
||||
|
||||
## @var file_ext (string)
|
||||
# file name extension including dot.
|
||||
#
|
||||
# the extension is set by the scattering code interface.
|
||||
# it must be passed back up the hierarchy.
|
||||
|
||||
## @var result_filename (string)
|
||||
# name of the ETPI or ETPAI file that contains the result (intensity) data.
|
||||
#
|
||||
# this member is filled at the end of the calculation by MscoProcess.calc().
|
||||
# the filename can be constructed given the base name, task ID, and extension.
|
||||
# since this may be tedious, the filename must be returned here.
|
||||
|
||||
## @var modf_filename (string)
|
||||
# name of the ETPI or ETPAI file that contains the resulting modulation function.
|
||||
|
||||
## @var time (timedelta)
|
||||
# execution time of the task.
|
||||
#
|
||||
# execution time is measured as wall time of a single calculation.
|
||||
# in parent tasks, execution time is the sum of the children's execution time.
|
||||
#
|
||||
# this information may be used to plan the end of the program run or for statistics.
|
||||
|
||||
## @var files (dict)
|
||||
# files generated by the task and their category
|
||||
#
|
||||
# dictionary key is the file name,
|
||||
# value is the file category, e.g. 'cluster', 'phase', etc.
|
||||
#
|
||||
# this information is used to automatically clean up unnecessary data files.
|
||||
|
||||
## @var region (dict)
|
||||
# scan positions to substitute the ones from the original scan.
|
||||
#
|
||||
# this is used to distribute scans over multiple calculator processes,
|
||||
# cf. e.g. @ref EnergyRegionHandler.
|
||||
#
|
||||
# dictionary key must be the scan dimension 'e', 't', 'p', 'a'.
|
||||
# the value is a numpy.ndarray containing the scan positions.
|
||||
#
|
||||
# the dictionary can be empty if the original scan shall be calculated at once.
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
create a new calculation task instance with all members equal to zero (root task).
|
||||
"""
|
||||
self.id = CalcID(-1, -1, -1, -1, -1)
|
||||
self.parent_id = self.id
|
||||
self.model = {}
|
||||
self.file_root = ""
|
||||
self.file_ext = ""
|
||||
self.result_filename = ""
|
||||
self.modf_filename = ""
|
||||
self.result_valid = False
|
||||
self.time = datetime.timedelta()
|
||||
self.files = {}
|
||||
self.region = {}
|
||||
|
||||
def __eq__(self, other):
|
||||
"""
|
||||
consider two tasks equal if they have the same ID.
|
||||
|
||||
EXPERIMENTAL
|
||||
not clear whether this is a good idea.
|
||||
we want this equality because the calculation may modify a task to return results.
|
||||
yet, it should be considered the same task.
|
||||
e.g., we want to find the task in the original task list.
|
||||
"""
|
||||
return isinstance(other, self.__class__) and self.id == other.id
|
||||
|
||||
def __hash__(self):
|
||||
"""
|
||||
the hash depends on the ID only.
|
||||
"""
|
||||
return hash(self.id)
|
||||
|
||||
def get_mpi_message(self):
|
||||
"""
|
||||
convert the task data to a format suitable for an MPI message.
|
||||
|
||||
mpi4py does not properly pickle objects.
|
||||
we need to convert our data to basic types.
|
||||
|
||||
@return: (dict)
|
||||
"""
|
||||
msg = vars(self)
|
||||
msg['id'] = self.id._asdict()
|
||||
msg['parent_id'] = self.parent_id._asdict()
|
||||
return msg
|
||||
|
||||
def set_mpi_message(self, msg):
|
||||
"""
|
||||
set object attributes from MPI message.
|
||||
|
||||
@param msg: message created by get_mpi_message()
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if isinstance(msg['id'], dict):
|
||||
msg['id'] = CalcID(**msg['id'])
|
||||
if isinstance(msg['parent_id'], dict):
|
||||
msg['parent_id'] = CalcID(**msg['parent_id'])
|
||||
for k, v in msg.iteritems():
|
||||
self.__setattr__(k, v)
|
||||
|
||||
def format_filename(self, **overrides):
|
||||
"""
|
||||
format input or output file name including calculation index.
|
||||
|
||||
@param overrides optional keyword arguments override object fields.
|
||||
the following keywords are handled: @c root, @c model, @c scan, @c sym, @c emit, @c region, @c ext.
|
||||
|
||||
@return a string consisting of the concatenation of the base name, the ID, and the extension.
|
||||
"""
|
||||
parts = {}
|
||||
parts['root'] = self.file_root
|
||||
parts['model'] = self.id.model
|
||||
parts['scan'] = self.id.scan
|
||||
parts['sym'] = self.id.sym
|
||||
parts['emit'] = self.id.emit
|
||||
parts['region'] = self.id.region
|
||||
parts['ext'] = self.file_ext
|
||||
|
||||
for key in overrides.keys():
|
||||
parts[key] = overrides[key]
|
||||
|
||||
filename = "{root}_{model}_{scan}_{sym}_{emit}_{region}{ext}".format(**parts)
|
||||
return filename
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
create a copy of the task.
|
||||
|
||||
@return: new independent CalculationTask with the same attributes as the original one.
|
||||
"""
|
||||
return copy.deepcopy(self)
|
||||
|
||||
def change_id(self, **kwargs):
|
||||
"""
|
||||
change the ID of the task.
|
||||
|
||||
@param kwargs: keyword arguments to change specific parts of the ID.
|
||||
|
||||
@note instead of changing all parts of the ID, you may simply assign a new CalcID to the id member.
|
||||
"""
|
||||
self.id = self.id._replace(**kwargs)
|
||||
|
||||
def add_task_file(self, name, category):
|
||||
"""
|
||||
register a file that was generated by the calculation task.
|
||||
|
||||
this information is used to automatically clean up unnecessary data files.
|
||||
|
||||
@param name: file name (optionally including a path).
|
||||
@param category: file category, e.g. 'cluster', 'phase', etc.
|
||||
@return: None
|
||||
"""
|
||||
self.files[name] = category
|
||||
|
||||
def rename_task_file(self, old_filename, new_filename):
|
||||
"""
|
||||
rename a file.
|
||||
|
||||
update the file list after a file was renamed.
|
||||
the method silently ignores if old_filename is not listed.
|
||||
|
||||
@param old_filename: old file name
|
||||
@param new_filename: new file name
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
self.files[new_filename] = self.files[old_filename]
|
||||
del self.files[old_filename]
|
||||
except KeyError:
|
||||
logger.warning("CalculationTask.rename_task_file: could not rename file {0} to {1}".format(old_filename,
|
||||
new_filename))
|
||||
|
||||
def remove_task_file(self, filename):
|
||||
"""
|
||||
remove a file from the list of generated data files.
|
||||
|
||||
the method silently ignores if filename is not listed.
|
||||
the method removes the file from the internal list.
|
||||
it does not delete the file.
|
||||
|
||||
@param filename: file name
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
del self.files[filename]
|
||||
except KeyError:
|
||||
logger.warning("CalculationTask.remove_task_file: could not remove file {0}".format(filename))
|
||||
|
||||
|
||||
class MscoProcess(object):
|
||||
"""
|
||||
code shared by MscoMaster and MscoSlave.
|
||||
|
||||
mainly passing project parameters, handling OS signals,
|
||||
calling an MSC calculation.
|
||||
"""
|
||||
|
||||
## @var _finishing
|
||||
# if True, the task loop should not accept new tasks.
|
||||
#
|
||||
# the loop still waits for the results of running calculations.
|
||||
|
||||
## @var _running
|
||||
# while True, the task loop keeps running.
|
||||
#
|
||||
# if False, the loop will exit just before the next iteration.
|
||||
# pending tasks and running calculations will not be waited for.
|
||||
#
|
||||
# @attention maks sure that all calculations are finished before resetting this flag.
|
||||
# higher ranked processes may not exit if they do not receive the finish message.
|
||||
|
||||
## @var datetime_limit (datetime.datetime)
|
||||
# date and time when the calculations should finish (regardless of result)
|
||||
# because the process may get killed by the scheduler after this time.
|
||||
#
|
||||
# the default is 2 days after start.
|
||||
|
||||
def __init__(self, comm):
|
||||
self._comm = comm
|
||||
self._project = None
|
||||
self._calculator = None
|
||||
self._running = False
|
||||
self._finishing = False
|
||||
self.stop_signal = False
|
||||
self.datetime_limit = datetime.datetime.now() + datetime.timedelta(days=2)
|
||||
|
||||
def setup(self, project):
|
||||
self._project = project
|
||||
self._calculator = project.calculator_class()
|
||||
self._running = False
|
||||
self._finishing = False
|
||||
self.stop_signal = False
|
||||
|
||||
try:
|
||||
# signal handlers
|
||||
signal.signal(signal.SIGTERM, self.receive_signal)
|
||||
signal.signal(signal.SIGUSR1, self.receive_signal)
|
||||
signal.signal(signal.SIGUSR2, self.receive_signal)
|
||||
except AttributeError:
|
||||
pass
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if project.timedelta_limit:
|
||||
self.datetime_limit = datetime.datetime.now() + project.timedelta_limit
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def receive_signal(self, signum, stack):
|
||||
"""
|
||||
sets the self.stop_signal flag,
|
||||
which will terminate the optimization process
|
||||
as soon as all slaves have finished their calculation.
|
||||
"""
|
||||
self.stop_signal = True
|
||||
|
||||
def run(self):
|
||||
pass
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
clean up after all calculations.
|
||||
|
||||
this method calls the clean up function of the project.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._project.cleanup()
|
||||
|
||||
def calc(self, task):
|
||||
"""
|
||||
execute a single calculation.
|
||||
|
||||
* create the cluster and parameter objects.
|
||||
* export the cluster for reference.
|
||||
* choose the scan file.
|
||||
* specify the output file name.
|
||||
* call the calculation program.
|
||||
* set task.result_filename, task.file_ext, task.time.
|
||||
|
||||
the function checks for some obvious errors, and skips the calculation if an error is detected, such as:
|
||||
|
||||
* missing atoms or emitters in the cluster.
|
||||
|
||||
@param task (CalculationTask) calculation task and identifier.
|
||||
"""
|
||||
|
||||
s_model = str(task.model)
|
||||
s_id = str(task.id)
|
||||
logger.info("calling calculation %s", s_id)
|
||||
logger.info("model %s", s_model)
|
||||
start_time = datetime.datetime.now()
|
||||
|
||||
# create parameter and cluster structures
|
||||
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
|
||||
par = self._project.create_params(task.model, task.id)
|
||||
|
||||
# generate file names
|
||||
output_file = task.format_filename(ext="")
|
||||
|
||||
# determine scan range
|
||||
scan = self._project.scans[task.id.scan]
|
||||
if task.region:
|
||||
scan = scan.copy()
|
||||
try:
|
||||
scan.energies = task.region['e']
|
||||
logger.debug(BMsg("substitute energy region"))
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
scan.thetas = task.region['t']
|
||||
logger.debug(BMsg("substitute theta region"))
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
scan.phis = task.region['p']
|
||||
logger.debug(BMsg("substitute phi region"))
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
scan.alphas = task.region['a']
|
||||
logger.debug(BMsg("substitute alpha region"))
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# check parameters and call the msc program
|
||||
if clu.get_atom_count() < 2:
|
||||
logger.error("empty cluster in calculation %s", s_id)
|
||||
task.result_valid = False
|
||||
elif clu.get_emitter_count() < 1:
|
||||
logger.error("no emitters in cluster of calculation %s.", s_id)
|
||||
task.result_valid = False
|
||||
else:
|
||||
files = self._calculator.check_cluster(clu, output_file)
|
||||
task.files.update(files)
|
||||
|
||||
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
|
||||
(root, ext) = os.path.splitext(task.result_filename)
|
||||
task.file_ext = ext
|
||||
task.result_valid = True
|
||||
task.files.update(files)
|
||||
|
||||
task.time = datetime.datetime.now() - start_time
|
||||
|
||||
return task
|
||||
|
||||
|
||||
class MscoMaster(MscoProcess):
|
||||
"""
|
||||
MscoMaster process for MSC calculations.
|
||||
|
||||
This class implements the main loop of the master (rank 0) process.
|
||||
It sends calculation commands to the slaves, and dispatches the results
|
||||
to the appropriate post-processing modules.
|
||||
|
||||
if there is only one process, the MscoMaster executes the calculations sequentially.
|
||||
"""
|
||||
|
||||
## @var _pending_tasks (OrderedDict)
|
||||
# CalculationTask objects of pending calculations.
|
||||
# the dictionary keys are the task IDs.
|
||||
|
||||
## @var _running_tasks
|
||||
# CalculationTask objects of currently running calculations.
|
||||
# the dictionary keys are the task IDs.
|
||||
|
||||
## @var _complete_tasks
|
||||
# CalculationTask objects of complete calculations.
|
||||
#
|
||||
# calculations are removed from the list when they are passed to the result handlers.
|
||||
# the dictionary keys are the task IDs.
|
||||
|
||||
## @var _slaves
|
||||
# total number of MPI slave ranks = number of calculator slots
|
||||
|
||||
## @var _idle_ranks
|
||||
# list of ranks which are waiting to receive a task.
|
||||
#
|
||||
# list of int, default = []
|
||||
|
||||
## @var max_calculations
|
||||
# maximum number of calculations
|
||||
#
|
||||
# if this limit is exceeded, the optimization will stop.
|
||||
# the limit is meant to catch irregular situations such as run-time calculation errors or infinite loops.
|
||||
|
||||
## @var _calculations
|
||||
# number of dispatched calculations
|
||||
#
|
||||
# if this number exceeds the @ref max_calculations, the optimization will stop.
|
||||
|
||||
## @var _running_slaves
|
||||
# number of running slave ranks
|
||||
#
|
||||
# keeps track of active (idle or busy) slave ranks.
|
||||
# it is used to make sure (if possible) that all slave tasks have finished before the master quits.
|
||||
# the number is decremented when a slave quits due to an error or when the master sends a finish message.
|
||||
|
||||
## @var _min_queue_len
|
||||
# if the queue length drops below this number, the dispatcher asks for the next round of tasks.
|
||||
|
||||
## @var _model_done
|
||||
# (bool) True if the model handler did returned an empty list of new tasks.
|
||||
|
||||
## @var _root_task
|
||||
# (CalculationTask) root calculation task
|
||||
#
|
||||
# this is the root of the calculation tasks tree.
|
||||
# it defines the initial model and the output file name.
|
||||
# it is passed to the model handler during the main loop.
|
||||
|
||||
# @var _model_handler
|
||||
# (ModelHandler) model handler instance
|
||||
|
||||
# @var _scan_handler
|
||||
# (ScanHandler) scan handler instance
|
||||
|
||||
# @var _symmetry_handler
|
||||
# (SymmetryHandler) symmetry handler instance
|
||||
|
||||
# @var _emitter_handler
|
||||
# (EmitterHandler) emitter handler instance
|
||||
|
||||
# @var _region_handler
|
||||
# (RegionHandler) region handler instance
|
||||
|
||||
def __init__(self, comm):
|
||||
super(MscoMaster, self).__init__(comm)
|
||||
self._pending_tasks = collections.OrderedDict()
|
||||
self._running_tasks = collections.OrderedDict()
|
||||
self._complete_tasks = collections.OrderedDict()
|
||||
self._slaves = self._comm.Get_size() - 1
|
||||
self._idle_ranks = []
|
||||
self.max_calculations = 1000000
|
||||
self._calculations = 0
|
||||
self._running_slaves = 0
|
||||
self._model_done = False
|
||||
self._min_queue_len = self._slaves + 1
|
||||
|
||||
self._root_task = None
|
||||
self._model_handler = None
|
||||
self._scan_handler = None
|
||||
self._symmetry_handler = None
|
||||
self._emitter_handler = None
|
||||
self._region_handler = None
|
||||
|
||||
def setup(self, project):
|
||||
"""
|
||||
initialize the process, handlers, root task, slave counting.
|
||||
|
||||
this method initializes the run-time attributes of the master process,
|
||||
particularly the attributes that depend on the project.
|
||||
|
||||
it creates the root calculation task with the initial model defined by the project.
|
||||
|
||||
it creates and initializes the task handler objects according to the handler classes defined by the project.
|
||||
|
||||
the method notifies the handlers of the number of available slave processes (slots).
|
||||
some of the tasks handlers adjust their branching according to the number of slots.
|
||||
this mechanism may be used to balance the load between the task levels.
|
||||
however, the current implementation is very coarse in this respect.
|
||||
it advertises all slots to the model handler but a reduced number to the remaining handlers
|
||||
depending on the operation mode.
|
||||
the region handler receives a maximum of 4 slots except in single calculation mode.
|
||||
in single calculation mode, all slots can be used by all handlers.
|
||||
"""
|
||||
super(MscoMaster, self).setup(project)
|
||||
|
||||
logger.debug("master entering setup")
|
||||
self._running_slaves = self._slaves
|
||||
self._idle_ranks = range(1, self._running_slaves + 1)
|
||||
|
||||
self._root_task = CalculationTask()
|
||||
self._root_task.file_root = project.output_file
|
||||
self._root_task.model = project.create_domain().start
|
||||
|
||||
self._model_handler = project.handler_classes['model']()
|
||||
self._scan_handler = project.handler_classes['scan']()
|
||||
self._symmetry_handler = project.handler_classes['symmetry']()
|
||||
self._emitter_handler = project.handler_classes['emitter']()
|
||||
self._region_handler = project.handler_classes['region']()
|
||||
|
||||
self._model_handler.datetime_limit = self.datetime_limit
|
||||
|
||||
slaves_adj = max(self._slaves, 1)
|
||||
self._model_handler.setup(project, slaves_adj)
|
||||
if project.mode != "single":
|
||||
slaves_adj = max(slaves_adj / 2, 1)
|
||||
self._scan_handler.setup(project, slaves_adj)
|
||||
self._symmetry_handler.setup(project, slaves_adj)
|
||||
self._emitter_handler.setup(project, slaves_adj)
|
||||
if project.mode != "single":
|
||||
slaves_adj = min(slaves_adj, 4)
|
||||
self._region_handler.setup(project, slaves_adj)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
main loop.
|
||||
|
||||
calls slaves, accept and dispatches results.
|
||||
|
||||
setup() must be called before, cleanup() after.
|
||||
"""
|
||||
self._running = True
|
||||
self._calculations = 0
|
||||
|
||||
logger.debug("master entering main loop")
|
||||
# main task loop
|
||||
while self._running:
|
||||
logger.debug("new iteration of master main loop")
|
||||
self._create_tasks()
|
||||
self._dispatch_results()
|
||||
if self._finishing:
|
||||
self._dispatch_finish()
|
||||
else:
|
||||
self._dispatch_tasks()
|
||||
self._receive_result()
|
||||
self._check_finish()
|
||||
|
||||
logger.debug("master exiting main loop")
|
||||
self._running = False
|
||||
|
||||
def cleanup(self):
|
||||
logger.debug("master entering cleanup")
|
||||
self._region_handler.cleanup()
|
||||
self._emitter_handler.cleanup()
|
||||
self._symmetry_handler.cleanup()
|
||||
self._scan_handler.cleanup()
|
||||
self._model_handler.cleanup()
|
||||
super(MscoMaster, self).cleanup()
|
||||
|
||||
def _dispatch_results(self):
|
||||
"""
|
||||
pass results through the post-processing modules.
|
||||
"""
|
||||
logger.debug("dispatching results of %u tasks", len(self._complete_tasks))
|
||||
while self._complete_tasks:
|
||||
__, task = self._complete_tasks.popitem(last=False)
|
||||
|
||||
logger.debug("passing task %s to region handler", str(task.id))
|
||||
task = self._region_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to emitter handler", str(task.id))
|
||||
task = self._emitter_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to symmetry handler", str(task.id))
|
||||
task = self._symmetry_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to scan handler", str(task.id))
|
||||
task = self._scan_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("passing task %s to model handler", str(task.id))
|
||||
task = self._model_handler.add_result(task)
|
||||
|
||||
if task:
|
||||
logger.debug("root task %s complete", str(task.id))
|
||||
self._finishing = True
|
||||
|
||||
def _create_tasks(self):
|
||||
"""
|
||||
have the model handler generate the next round of top-level calculation tasks.
|
||||
|
||||
the method calls the model handler repeatedly
|
||||
until the pending tasks queue is filled up
|
||||
to more than the minimum queue length.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
logger.debug("creating new tasks from root")
|
||||
while len(self._pending_tasks) < self._min_queue_len:
|
||||
tasks = self._model_handler.create_tasks(self._root_task)
|
||||
logger.debug("model handler returned %u new tasks", len(tasks))
|
||||
if not tasks:
|
||||
self._model_done = True
|
||||
break
|
||||
for task in tasks:
|
||||
self.add_model_task(task)
|
||||
|
||||
def _dispatch_tasks(self):
|
||||
"""
|
||||
send pending tasks to available slaves or master.
|
||||
|
||||
if there is only one process, the master executes one task, and returns.
|
||||
"""
|
||||
logger.debug("dispatching tasks to calculators")
|
||||
if self._slaves > 0:
|
||||
while not self._finishing:
|
||||
try:
|
||||
rank = self._idle_ranks.pop(0)
|
||||
except IndexError:
|
||||
break
|
||||
|
||||
try:
|
||||
__, task = self._pending_tasks.popitem(last=False)
|
||||
except KeyError:
|
||||
self._idle_ranks.append(rank)
|
||||
break
|
||||
else:
|
||||
logger.debug("assigning task %s to rank %u", str(task.id), rank)
|
||||
self._running_tasks[task.id] = task
|
||||
self._comm.send(task.get_mpi_message(), dest=rank, tag=TAG_NEW_TASK)
|
||||
self._calculations += 1
|
||||
else:
|
||||
if not self._finishing:
|
||||
try:
|
||||
__, task = self._pending_tasks.popitem(last=False)
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
logger.debug("executing task %s in master process", str(task.id))
|
||||
self.calc(task)
|
||||
self._calculations += 1
|
||||
self._complete_tasks[task.id] = task
|
||||
|
||||
def _dispatch_finish(self):
|
||||
"""
|
||||
send all slave ranks a finish message.
|
||||
"""
|
||||
logger.debug("dispatch finish message to %u slaves", len(self._idle_ranks))
|
||||
while self._idle_ranks:
|
||||
rank = self._idle_ranks.pop()
|
||||
logger.debug("send finish tag to rank %u", rank)
|
||||
self._comm.send(None, dest=rank, tag=TAG_FINISH)
|
||||
self._running_slaves -= 1
|
||||
|
||||
def _receive_result(self):
|
||||
"""
|
||||
wait for a message from another rank and process it.
|
||||
"""
|
||||
if self._running_slaves > 0:
|
||||
logger.debug("waiting for calculation result")
|
||||
s = MPI.Status()
|
||||
data = self._comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
|
||||
|
||||
if s.tag == TAG_NEW_RESULT:
|
||||
task_id = self._accept_task_done(data)
|
||||
self._idle_ranks.append(s.source)
|
||||
logger.debug(BMsg("received result of task {0} from rank {1}", task_id, s.source))
|
||||
elif s.tag == TAG_INVALID_RESULT:
|
||||
task_id = self._accept_task_done(data)
|
||||
self._idle_ranks.append(s.source)
|
||||
logger.error(BMsg("received invalid result of task {0} from rank {1}", task_id, s.source))
|
||||
elif s.tag == TAG_ERROR_ABORTING:
|
||||
self._finishing = True
|
||||
self._running_slaves -= 1
|
||||
task_id = self._accept_task_done(data)
|
||||
logger.error(BMsg("received abort signal from rank {1}", task_id, s.source))
|
||||
|
||||
def _accept_task_done(self, data):
|
||||
"""
|
||||
check the return message from a slave process and mark the task done.
|
||||
|
||||
if the message contains complete data of a running task, the corresponding CalculationTask object is returned.
|
||||
|
||||
@param data: a dictionary that can be imported into a CalculationTask object by the set_mpi_message() method.
|
||||
|
||||
@return: task ID (CalcID type) if the message contains the complete identification of a pending task,
|
||||
None if the ID cannot be determined or is not in the list of running tasks.
|
||||
"""
|
||||
try:
|
||||
task = CalculationTask()
|
||||
task.set_mpi_message(data)
|
||||
del self._running_tasks[task.id]
|
||||
self._complete_tasks[task.id] = task
|
||||
task_id = task.id
|
||||
except (TypeError, IndexError, KeyError):
|
||||
task_id = None
|
||||
|
||||
return task_id
|
||||
|
||||
def _check_finish(self):
|
||||
"""
|
||||
check whether the task loop is finished.
|
||||
|
||||
the task loop is finished on any of the following conditions:
|
||||
* there are no pending or running tasks,
|
||||
* a file named "finish_pmsco" exists in the working directory,
|
||||
* a SIGUSR1, SIGUSR2, or SIGTERM signal was received,
|
||||
* self.datetime_limit is exceeded, or
|
||||
* self.max_calculations is exceeded.
|
||||
|
||||
self._finishing is set if any of these conditions is fulfilled.
|
||||
|
||||
self._running is reset if self._finishing is set and no calculation tasks are running.
|
||||
|
||||
@return: self._finishing
|
||||
"""
|
||||
if not self._finishing and (self._model_done and not self._pending_tasks and not self._running_tasks):
|
||||
logger.info("finish: model handler is done")
|
||||
self._finishing = True
|
||||
if not self._finishing and (self._calculations >= self.max_calculations):
|
||||
logger.warning("finish: max. calculations (%u) exeeded", self.max_calculations)
|
||||
self._finishing = True
|
||||
if not self._finishing and self.stop_signal:
|
||||
logger.info("finish: stop signal received")
|
||||
self._finishing = True
|
||||
if not self._finishing and (datetime.datetime.now() > self.datetime_limit):
|
||||
logger.warning("finish: time limit exceeded")
|
||||
self._finishing = True
|
||||
if not self._finishing and os.path.isfile("finish_pmsco"):
|
||||
logger.info("finish: finish_pmsco file detected")
|
||||
self._finishing = True
|
||||
|
||||
if self._finishing and not self._running_slaves and not self._running_tasks:
|
||||
logger.info("finish: all calculations finished")
|
||||
self._running = False
|
||||
|
||||
return self._finishing
|
||||
|
||||
def add_model_task(self, task):
|
||||
"""
|
||||
add a new model task including all of its children to the task queue.
|
||||
|
||||
@param task (CalculationTask) task identifier and model parameters.
|
||||
"""
|
||||
|
||||
scan_tasks = self._scan_handler.create_tasks(task)
|
||||
for scan_task in scan_tasks:
|
||||
sym_tasks = self._symmetry_handler.create_tasks(scan_task)
|
||||
for sym_task in sym_tasks:
|
||||
emitter_tasks = self._emitter_handler.create_tasks(sym_task)
|
||||
for emitter_task in emitter_tasks:
|
||||
region_tasks = self._region_handler.create_tasks(emitter_task)
|
||||
for region_task in region_tasks:
|
||||
self._pending_tasks[region_task.id] = region_task
|
||||
|
||||
|
||||
class MscoSlave(MscoProcess):
|
||||
"""
|
||||
MscoSlave process for MSC calculations.
|
||||
|
||||
This class implements the main loop of a slave (rank > 0) process.
|
||||
It waits for assignments from the master process,
|
||||
and runs one calculation after the other.
|
||||
"""
|
||||
|
||||
## @var _errors
|
||||
# number of errors (exceptions) encountered in calculation tasks.
|
||||
#
|
||||
# typically, a task is aborted when an exception is encountered.
|
||||
|
||||
def __init__(self, comm):
|
||||
super(MscoSlave, self).__init__(comm)
|
||||
self._errors = 0
|
||||
self._max_errors = 5
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Waits for messages from the master and dispatches tasks.
|
||||
"""
|
||||
logger.debug("slave entering main loop")
|
||||
s = MPI.Status()
|
||||
self._running = True
|
||||
while self._running:
|
||||
logger.debug("waiting for message")
|
||||
data = self._comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
|
||||
if s.tag == TAG_NEW_TASK:
|
||||
logger.debug("received new task")
|
||||
self.accept_task(data)
|
||||
elif s.tag == TAG_FINISH:
|
||||
logger.debug("received finish message")
|
||||
self._running = False
|
||||
|
||||
logger.debug("slave exiting main loop")
|
||||
|
||||
def accept_task(self, data):
|
||||
"""
|
||||
Executes a calculation task and returns the result to the master.
|
||||
|
||||
if a recoverable exception (math, value and key errors) occurs,
|
||||
the method catches the exception but sends a failure message to the master.
|
||||
if exceptions occur repeatedly, the slave aborts and sends an abort message to the master.
|
||||
|
||||
@param data: task message received from MPI.
|
||||
"""
|
||||
task = CalculationTask()
|
||||
task.set_mpi_message(data)
|
||||
logger.debug(BMsg("executing task {0} in slave process", task.id))
|
||||
try:
|
||||
result = self.calc(task)
|
||||
self._errors = 0
|
||||
except (ValueError, ArithmeticError, LookupError):
|
||||
logger.exception(BMsg("unhandled exception in calculation task {0}", task.id))
|
||||
self._errors += 1
|
||||
if self._errors <= self._max_errors:
|
||||
self._comm.send(data, dest=0, tag=TAG_INVALID_RESULT)
|
||||
else:
|
||||
logger.error("too many exceptions, aborting")
|
||||
self._running = False
|
||||
self._comm.send(data, dest=0, tag=TAG_ERROR_ABORTING)
|
||||
else:
|
||||
logger.debug(BMsg("sending result of task {0} to master", result.id))
|
||||
self._comm.send(result.get_mpi_message(), dest=0, tag=TAG_NEW_RESULT)
|
||||
|
||||
|
||||
def run_master(mpi_comm, project):
|
||||
"""
|
||||
initialize and run the master calculation loop.
|
||||
|
||||
a MscoMaster object is created.
|
||||
the MscoMaster executes the calculation loop and dispatches the tasks.
|
||||
|
||||
this function must be called in the MPI rank 0 process only.
|
||||
|
||||
if an unhandled exception occurs, this function aborts the MPI communicator, killing all MPI processes.
|
||||
the caller will not have a chance to handle the exception.
|
||||
|
||||
@param mpi_comm: MPI communicator (mpi4py.MPI.COMM_WORLD).
|
||||
|
||||
@param project: project instance (sub-class of project.Project).
|
||||
"""
|
||||
try:
|
||||
master = MscoMaster(mpi_comm)
|
||||
master.setup(project)
|
||||
master.run()
|
||||
master.cleanup()
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
mpi_comm.Abort()
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("unhandled exception in master calculation loop.")
|
||||
mpi_comm.Abort()
|
||||
raise
|
||||
|
||||
|
||||
def run_slave(mpi_comm, project):
|
||||
"""
|
||||
initialize and run the slave calculation loop.
|
||||
|
||||
a MscoSlave object is created.
|
||||
the MscoSlave accepts tasks from rank 0 and runs the calculations.
|
||||
|
||||
this function must be called in MPI rank > 0 processes.
|
||||
|
||||
if an unhandled exception occurs, the slave process terminates.
|
||||
unless it is a SystemExit or KeyboardInterrupt (where we expect that the master also receives the signal),
|
||||
the MPI communicator is aborted, killing all MPI processes.
|
||||
|
||||
@param mpi_comm: MPI communicator (mpi4py.MPI.COMM_WORLD).
|
||||
|
||||
@param project: project instance (sub-class of project.Project).
|
||||
"""
|
||||
try:
|
||||
slave = MscoSlave(mpi_comm)
|
||||
slave.setup(project)
|
||||
slave.run()
|
||||
slave.cleanup()
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("unhandled exception in slave calculation loop.")
|
||||
mpi_comm.Abort()
|
||||
raise
|
||||
|
||||
|
||||
def run_calculations(project):
|
||||
"""
|
||||
initialize and run the main calculation loop.
|
||||
|
||||
depending on the MPI rank, the function branches into run_master() (rank 0) or run_slave() (rank > 0).
|
||||
|
||||
@param project: project instance (sub-class of project.Project).
|
||||
"""
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
|
||||
if mpi_rank == 0:
|
||||
logger.debug("MPI rank %u setting up master loop", mpi_rank)
|
||||
run_master(mpi_comm, project)
|
||||
else:
|
||||
logger.debug("MPI rank %u setting up slave loop", mpi_rank)
|
||||
run_slave(mpi_comm, project)
|
||||
3
pmsco/edac/.gitignore
vendored
Normal file
3
pmsco/edac/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
edac.py
|
||||
edac_wrap.cxx
|
||||
revision.py
|
||||
1
pmsco/edac/__init__.py
Normal file
1
pmsco/edac/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__author__ = 'muntwiler_m'
|
||||
7
pmsco/edac/edac.i
Normal file
7
pmsco/edac/edac.i
Normal file
@@ -0,0 +1,7 @@
|
||||
/* EDAC interface for other programs */
|
||||
%module edac
|
||||
%{
|
||||
extern int run_script(char *scriptfile);
|
||||
%}
|
||||
|
||||
extern int run_script(char *scriptfile);
|
||||
130
pmsco/edac/edac_all.patch
Normal file
130
pmsco/edac/edac_all.patch
Normal file
@@ -0,0 +1,130 @@
|
||||
*** /home/muntwiler_m/mnt/pearl_data/software/edac/edac_all.cpp 2011-04-14 23:38:44.000000000 +0200
|
||||
--- edac_all.cpp 2016-02-11 12:15:45.322049772 +0100
|
||||
***************
|
||||
*** 10117,10122 ****
|
||||
--- 10117,10123 ----
|
||||
void scan_imfp(char *name);
|
||||
void scan_imfp(FILE *fout);
|
||||
numero iimfp_TPP(numero kr);
|
||||
+ numero iimfp_SD(numero kr);
|
||||
numero TPP_rho, TPP_Nv, TPP_Ep, TPP_Eg;
|
||||
numero screening_length;
|
||||
int scattering_so;
|
||||
***************
|
||||
*** 10230,10235 ****
|
||||
--- 10231,10237 ----
|
||||
|
||||
int n_th;
|
||||
int n_fi;
|
||||
+ int n_ang;
|
||||
numero *th, *fi;
|
||||
|
||||
numero *th_out,
|
||||
***************
|
||||
*** 10239,10244 ****
|
||||
--- 10241,10247 ----
|
||||
void free(void);
|
||||
void init_th(numero thi, numero thf, int nth);
|
||||
void init_phi(numero fii, numero fif, int nfi);
|
||||
+ void read_angles(FILE *fin, char *my_file);
|
||||
void init_refraction(
|
||||
numero refraction);
|
||||
void init_transmission(
|
||||
***************
|
||||
*** 12485,12490 ****
|
||||
--- 12488,12494 ----
|
||||
else {
|
||||
kr=sqrt(sqr(calc.k[ik])+2*V0);
|
||||
if(iimfp_flag==0) ki=iimfp.val(kr)/2;
|
||||
+ else if(iimfp_flag==3) ki=iimfp_SD(kr)/2;
|
||||
else ki=iimfp_TPP(kr)/2;
|
||||
set_k(complex(kr,ki));
|
||||
} } else if(calc.k_flag==2) set_k(calc.kc[ik]);
|
||||
***************
|
||||
*** 12507,12512 ****
|
||||
--- 12511,12522 ----
|
||||
numero imfp=E/(TPP_Ep*TPP_Ep*(beta*log(gamma*E)-C/E+D/(E*E)))/a0_au;
|
||||
return 1/imfp;
|
||||
}
|
||||
+ numero propagation::iimfp_SD(numero kr)
|
||||
+ {
|
||||
+ numero E=sqr(kr)/2*au_eV;
|
||||
+ numero imfp = (1.43e3/sqr(E) + 0.54*sqrt(E))/a0_au;
|
||||
+ return 1/imfp;
|
||||
+ }
|
||||
void propagation::scan_imfp(char *name)
|
||||
{
|
||||
FILE *fout=NULL;
|
||||
***************
|
||||
*** 13202,13208 ****
|
||||
}
|
||||
final_state::final_state(void)
|
||||
{
|
||||
! n_th=n_fi=0;
|
||||
n_1=n_2=0;
|
||||
Ylm0_th_flag=Ylm0_fi_flag=0;
|
||||
mesh_flag=0;
|
||||
--- 13212,13218 ----
|
||||
}
|
||||
final_state::final_state(void)
|
||||
{
|
||||
! n_th=n_fi=n_ang=0;
|
||||
n_1=n_2=0;
|
||||
Ylm0_th_flag=Ylm0_fi_flag=0;
|
||||
mesh_flag=0;
|
||||
***************
|
||||
*** 13233,13238 ****
|
||||
--- 13243,13271 ----
|
||||
if(n_fi==1) fi[0]=fii;
|
||||
else for(j=0; j<n_fi; j++) fi[j]=fii+j*(fif-fii)/(n_fi-1);
|
||||
} }
|
||||
+ void final_state::read_angles(FILE *fin, char *my_file)
|
||||
+ {
|
||||
+ FILE *fang; int i, nang;
|
||||
+ if(!strcmpC(my_file,"inline")) fang=fin;
|
||||
+ else fang=open_file(foutput,my_file,"r");
|
||||
+ nang=read_int(fang);
|
||||
+ free_mesh();
|
||||
+ if(nang>1) {
|
||||
+ delete [] th; delete [] th_out; delete [] transmission; delete [] fi;
|
||||
+ n_th=nang;
|
||||
+ th=new numero [n_th];
|
||||
+ th_out=new numero [n_th];
|
||||
+ transmission=new numero [n_th];
|
||||
+ n_fi=nang;
|
||||
+ fi=new numero [n_fi];
|
||||
+ for(i=0; i<nang; i++) {
|
||||
+ th[i]=th_out[i]=read_numero(fang);
|
||||
+ transmission[i]=1;
|
||||
+ fi[i]=read_numero(fang);
|
||||
+ }
|
||||
+ }
|
||||
+ if(strcmpC(my_file,"inline")) fclose(fang);
|
||||
+ }
|
||||
void final_state::init_refraction(numero refraction)
|
||||
{
|
||||
int i;
|
||||
***************
|
||||
*** 14743,14748 ****
|
||||
--- 14776,14783 ----
|
||||
|| scat.TPP_Ep<=0 || scat.TPP_Eg<0)
|
||||
on_error(foutput,"(input) imfp TPP-2M", "wrong parameters");
|
||||
scat.iimfp_flag=1;
|
||||
+ } else if(!strcmpC(name,"SD-UC")) {
|
||||
+ scat.iimfp_flag=3;
|
||||
} else {
|
||||
scat.read_imfp(fprog,name);
|
||||
scat.iimfp_flag=0;
|
||||
***************
|
||||
*** 15162,15164 ****
|
||||
--- 15197,15206 ----
|
||||
fprintf(foutput,"That's all, folks!\n");
|
||||
return 0;
|
||||
}
|
||||
+ int run_script(char *scriptfile)
|
||||
+ {
|
||||
+ particle_type=electrones;
|
||||
+ init_fact();
|
||||
+ electron.program(scriptfile);
|
||||
+ return 0;
|
||||
+ }
|
||||
52
pmsco/edac/makefile
Normal file
52
pmsco/edac/makefile
Normal file
@@ -0,0 +1,52 @@
|
||||
SHELL=/bin/sh
|
||||
|
||||
# makefile for EDAC program and module
|
||||
#
|
||||
# the EDAC source code is not included in the public distribution.
|
||||
# please obtain it from the original author,
|
||||
# copy it to this directory,
|
||||
# and apply the edac_all.patch patch before compilation.
|
||||
#
|
||||
# see the top-level makefile for additional information.
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
|
||||
.PHONY: all clean edac
|
||||
|
||||
FC=gfortran
|
||||
FCCOPTS=
|
||||
F2PY=f2py
|
||||
F2PYOPTS=
|
||||
CC=g++
|
||||
CCOPTS=-Wno-write-strings
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
|
||||
all: edac
|
||||
|
||||
edac: edac.exe _edac.so edac.py
|
||||
|
||||
edac.exe: edac_all.cpp
|
||||
$(CC) $(CCOPTS) -o edac.exe edac_all.cpp
|
||||
|
||||
edac_wrap.cxx: edac_all.cpp edac.i
|
||||
$(SWIG) $(SWIGOPTS) -c++ -python edac.i
|
||||
|
||||
edac.py _edac.so: edac_wrap.cxx setup.py
|
||||
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
|
||||
|
||||
revision.py: _edac.so
|
||||
git log --pretty=format:"code_rev = 'Code revision %h, %ad'" --date=iso -1 > $@ || echo "code_rev = 'Code revision unknown, "`date +"%F %T %z"`"'" > $@
|
||||
echo "" >> revision.py
|
||||
|
||||
revision.txt: _edac.so edac.exe
|
||||
git log --pretty=format:"Code revision %h, %ad" --date=iso -1 > $@ || echo "Code revision unknown, "`date +"%F %T %z"` > $@
|
||||
echo "" >> revision.txt
|
||||
|
||||
clean:
|
||||
rm -f *.so *.o *.exe
|
||||
rm -f *_wrap.cxx
|
||||
rm -f revision.py
|
||||
rm -f revision.txt
|
||||
20
pmsco/edac/setup.py
Normal file
20
pmsco/edac/setup.py
Normal file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
setup.py file for EDAC
|
||||
"""
|
||||
|
||||
from distutils.core import setup, Extension
|
||||
|
||||
|
||||
edac_module = Extension('_edac',
|
||||
sources=['edac_wrap.cxx', 'edac_all.cpp'],
|
||||
)
|
||||
|
||||
setup (name = 'edac',
|
||||
version = '0.1',
|
||||
author = "Matthias Muntwiler",
|
||||
description = """EDAC module in Python""",
|
||||
ext_modules = [edac_module],
|
||||
py_modules = ["edac"], requires=['numpy']
|
||||
)
|
||||
223
pmsco/edac_calculator.py
Normal file
223
pmsco/edac_calculator.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""
|
||||
@package pmsco.edac_calculator
|
||||
Garcia de Abajo EDAC program interface.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import os
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import calculator
|
||||
import data as md
|
||||
import cluster as mc
|
||||
import edac.edac as edac
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EdacCalculator(calculator.Calculator):
|
||||
def write_input_file(self, params, scan, filepath):
|
||||
"""
|
||||
write parameters to an EDAC input file
|
||||
|
||||
EDAC will calculate results on a rectangular grid.
|
||||
the grid is constructed from the limits of the scan coordinates
|
||||
and the number of positions in each respective dimension.
|
||||
to avoid any confusion, the input scan should be rectangular with equidistant steps.
|
||||
|
||||
the following scans are supported:
|
||||
(energy), (energy, theta), (energy, phi), (energy, alpha), (theta, phi) holo.
|
||||
except for the holo scan, each scan dimension must be linear.
|
||||
the holo scan is translated to a rectangular (theta, phi) scan
|
||||
where theta is copied
|
||||
and phi is replaced by a linear scan from the minimum to the maximum phi at 1 degree steps.
|
||||
the scan type is detected from the scan file.
|
||||
|
||||
if alpha is defined, theta is implicitly set to normal emission! (to be generalized)
|
||||
|
||||
TODO: some parameters are still hard-coded.
|
||||
"""
|
||||
with open(filepath, "w") as f:
|
||||
f.write("verbose off\n")
|
||||
f.write("cluster input %s\n" % (params.cluster_file))
|
||||
f.write("emitters %u l(A)\n" % (len(params.emitters)))
|
||||
for em in params.emitters:
|
||||
f.write("%g %g %g %u\n" % em)
|
||||
#for iat in range(params.atom_types):
|
||||
#pf = params.phase_file[iat]
|
||||
#pf = pf.replace(".pha", ".edac.pha")
|
||||
#f.write("scatterer %u %s\n" % (params.atomic_number[iat], pf))
|
||||
|
||||
en = scan.energies + params.work_function
|
||||
en_min = en.min()
|
||||
en_max = en.max()
|
||||
if en.shape[0] <= 1:
|
||||
en_num = 1
|
||||
else:
|
||||
de = np.diff(en)
|
||||
de = de[de >= 0.01]
|
||||
de = de.min()
|
||||
en_num = int(round((en_max - en_min) / de)) + 1
|
||||
if en_num != en.shape[0]:
|
||||
logger.warning("energy scan length mismatch: EDAC {0}, scan {1}".format(en_num, en.shape[0]))
|
||||
assert en_num < en.shape[0] * 10, \
|
||||
"linearization of energy scan causes excessive oversampling {0}/{1}".format(en_num, en.shape[0])
|
||||
f.write("emission energy E(eV) {en0:f} {en1:f} {nen:d}\n".format(en0=en_min, en1=en_max, nen=en_num))
|
||||
|
||||
if params.fixed_cluster:
|
||||
th = scan.alphas
|
||||
ph = np.remainder(scan.phis + 90.0, 360.0)
|
||||
f.write("fixed cluster\n")
|
||||
if np.abs(scan.thetas).max() > 0.0:
|
||||
logger.warning("theta angle implicitly set to zero due to alpha scan.")
|
||||
else:
|
||||
th = np.unique(scan.thetas)
|
||||
ph = scan.phis
|
||||
f.write("movable cluster\n")
|
||||
|
||||
th_min = th.min()
|
||||
th_max = th.max()
|
||||
if th.shape[0] <= 1:
|
||||
th_num = 1
|
||||
else:
|
||||
dt = np.diff(th)
|
||||
dt = dt[dt >= 0.1]
|
||||
dt = dt.min()
|
||||
if ph.shape[0] > 1:
|
||||
# hemispherical scan
|
||||
if th_min < 0:
|
||||
th_min = max(th_min - dt, -90.0)
|
||||
else:
|
||||
th_min = max(th_min - dt, 0.0)
|
||||
if th_max > 0:
|
||||
th_max = min(th_max + dt, 90.0)
|
||||
else:
|
||||
th_max = min(th_max + dt, 0.0)
|
||||
th_num = int(round((th_max - th_min) / dt)) + 1
|
||||
assert th_num < th.shape[0] * 10, \
|
||||
"linearization of theta scan causes excessive oversampling {0}/{1}".format(th_num, th.shape[0])
|
||||
|
||||
f.write("beta {0}\n".format(params.polar_incidence_angle, params.azimuthal_incidence_angle))
|
||||
f.write("incidence {0} {1}\n".format(params.polar_incidence_angle, params.azimuthal_incidence_angle))
|
||||
f.write("emission angle theta {th0:f} {th1:f} {nth:d}\n".format(th0=th_min, th1=th_max, nth=th_num))
|
||||
|
||||
ph_min = ph.min()
|
||||
ph_max = ph.max()
|
||||
if th.shape[0] <= 1:
|
||||
# azimuthal scan
|
||||
ph_num = ph.shape[0]
|
||||
elif ph.shape[0] <= 1:
|
||||
# polar scan
|
||||
ph_num = 1
|
||||
else:
|
||||
# hemispherical scan
|
||||
dp = np.diff(ph)
|
||||
dp = dp[dp >= 0.1]
|
||||
dp = dp.min()
|
||||
ph_min = max(ph_min - dp, 0.0)
|
||||
ph_max = min(ph_max + dp, 360.0)
|
||||
dt = (th_max - th_min) / (th_num - 1)
|
||||
dp = min(dp, dt)
|
||||
ph_num = int(round((ph_max - ph_min) / dp)) + 1
|
||||
assert ph_num < ph.shape[0] * 10, \
|
||||
"linearization of phi scan causes excessive oversampling {0}/{1}".format(ph_num, ph.shape[0])
|
||||
|
||||
f.write("emission angle phi {ph0:f} {ph1:f} {nph:d}\n".format(ph0=ph_min, ph1=ph_max, nph=ph_num))
|
||||
|
||||
f.write("initial state {0}\n".format(params.initial_state))
|
||||
polarizations = {'H': 'LPx', 'V': 'LPy', 'L': 'LCP', 'R': 'RCP'}
|
||||
f.write("polarization {0}\n".format(polarizations[params.polarization]))
|
||||
f.write("muffin-tin\n")
|
||||
f.write("V0 E(eV) {0}\n".format(params.inner_potential))
|
||||
f.write("cluster surface l(A) {0}\n".format(params.z_surface))
|
||||
f.write("imfp SD-UC\n")
|
||||
f.write("temperature %g %g\n" % (params.experiment_temperature, params.debye_temperature))
|
||||
f.write("iteration recursion\n")
|
||||
f.write("dmax l(A) %g\n" % (params.dmax))
|
||||
f.write("lmax %u\n" % (params.lmax))
|
||||
f.write("orders %u " % (len(params.orders)))
|
||||
for order in params.orders:
|
||||
f.write("%u " % (order))
|
||||
f.write("\n")
|
||||
f.write("emission angle window 1\n")
|
||||
f.write("scan pd %s\n" % (params.output_file))
|
||||
f.write("end\n")
|
||||
|
||||
def run(self, params, cluster, scan, output_file):
|
||||
"""
|
||||
run EDAC with the given parameters and cluster.
|
||||
|
||||
@param params: a msc_param.Params() object with all necessary values except cluster and output files set.
|
||||
|
||||
@param cluster: a msc_cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
|
||||
|
||||
@param scan: a msco_project.Scan() object describing the experimental scanning scheme.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: result_file, files_cats
|
||||
"""
|
||||
|
||||
# set up scan
|
||||
params.fixed_cluster = 'a' in scan.mode
|
||||
|
||||
# generate file names
|
||||
base_filename = output_file
|
||||
clu_filename = base_filename + ".clu"
|
||||
out_filename = base_filename + ".out"
|
||||
par_filename = base_filename + ".par"
|
||||
dat_filename = out_filename
|
||||
if params.fixed_cluster:
|
||||
etpi_filename = base_filename + ".etpai"
|
||||
else:
|
||||
etpi_filename = base_filename + ".etpi"
|
||||
|
||||
# fix EDAC particularities
|
||||
params.cluster_file = clu_filename
|
||||
params.output_file = out_filename
|
||||
params.data_file = dat_filename
|
||||
params.emitters = cluster.get_emitters()
|
||||
|
||||
# save parameter files
|
||||
logger.debug("writing cluster file %s", clu_filename)
|
||||
cluster.save_to_file(clu_filename, fmt=mc.FMT_EDAC)
|
||||
logger.debug("writing input file %s", par_filename)
|
||||
self.write_input_file(params, scan, par_filename)
|
||||
|
||||
# run EDAC
|
||||
logger.info("calling EDAC with input file %s", par_filename)
|
||||
edac.run_script(par_filename)
|
||||
|
||||
# load results and save in ETPI or ETPAI format
|
||||
logger.debug("importing data from output file %s", dat_filename)
|
||||
result_etpi = md.load_edac_pd(dat_filename, energy=scan.energies[0] + params.work_function,
|
||||
theta=scan.thetas[0], phi=scan.phis[0],
|
||||
fixed_cluster=params.fixed_cluster)
|
||||
result_etpi['e'] -= params.work_function
|
||||
|
||||
if 't' in scan.mode and 'p' in scan.mode:
|
||||
hemi_tpi = scan.raw_data.copy()
|
||||
hemi_tpi['i'] = 0.0
|
||||
try:
|
||||
hemi_tpi['s'] = 0.0
|
||||
except ValueError:
|
||||
pass
|
||||
result_etpi = md.interpolate_hemi_scan(result_etpi, hemi_tpi)
|
||||
|
||||
if result_etpi.shape[0] != scan.raw_data.shape[0]:
|
||||
logger.error("scan length mismatch: EDAC result: %u, scan data: %u", result_etpi.shape[0], scan.raw_data.shape[0])
|
||||
logger.debug("save result to file %s", etpi_filename)
|
||||
md.save_data(etpi_filename, result_etpi)
|
||||
|
||||
files = {clu_filename: 'input', par_filename: 'input', dat_filename: 'output',
|
||||
etpi_filename: 'region'}
|
||||
return etpi_filename, files
|
||||
324
pmsco/files.py
Normal file
324
pmsco/files.py
Normal file
@@ -0,0 +1,324 @@
|
||||
"""
|
||||
@package pmsco.files
|
||||
manage files produced by pmsco.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2016 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import mpi4py
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
## @var FILE_CATEGORIES
|
||||
# categories of generated files.
|
||||
#
|
||||
# these labels are used to decide which output files are kept or deleted after the calculation.
|
||||
#
|
||||
# each string of this set marks a category of files.
|
||||
#
|
||||
# @arg @c 'input' : raw input files for calculator, including cluster and phase files in custom format
|
||||
# @arg @c 'output' : raw output files from calculator
|
||||
# @arg @c 'phase' : phase files in portable format for report
|
||||
# @arg @c 'cluster' : cluster files in portable XYZ format for report
|
||||
# @arg @c 'log' : log files
|
||||
# @arg @c 'debug' : debug files
|
||||
# @arg @c 'model': output files in ETPAI format: complete simulation (a_-1_-1_-1_-1)
|
||||
# @arg @c 'scan' : output files in ETPAI format: scan (a_b_-1_-1_-1)
|
||||
# @arg @c 'symmetry' : output files in ETPAI format: symmetry (a_b_c_-1_-1)
|
||||
# @arg @c 'emitter' : output files in ETPAI format: emitter (a_b_c_d_-1)
|
||||
# @arg @c 'region' : output files in ETPAI format: region (a_b_c_d_e)
|
||||
# @arg @c 'report': final report of results
|
||||
# @arg @c 'population': final state of particle population
|
||||
# @arg @c 'rfac': files related to models which give bad r-factors (dynamic category, see below).
|
||||
#
|
||||
# @note @c 'rfac' is a dynamic category not connected to a particular file or content type.
|
||||
# no file should be marked @c 'rfac'.
|
||||
# the string is used only to specify whether bad models should be deleted or not.
|
||||
# if so, all files related to bad models are deleted, regardless of their static category.
|
||||
#
|
||||
FILE_CATEGORIES = {'cluster', 'phase', 'input', 'output',
|
||||
'report', 'region', 'emitter', 'scan', 'symmetry', 'model',
|
||||
'log', 'debug', 'population', 'rfac'}
|
||||
|
||||
## @var FILE_CATEGORIES_TO_KEEP
|
||||
# categories of files to be keep.
|
||||
#
|
||||
# this constant defines the default set of file categories that are kept after the calculation.
|
||||
#
|
||||
FILE_CATEGORIES_TO_KEEP = {'cluster', 'model', 'report', 'population'}
|
||||
|
||||
## @var FILE_CATEGORIES_TO_DELETE
|
||||
# categories of files to be deleted.
|
||||
#
|
||||
# this constant defines the default set of file categories that are deleted after the calculation.
|
||||
# it contains all values from FILE_CATEGORIES minus FILE_CATEGORIES_TO_KEEP.
|
||||
# it is used to initialize Project.files_to_delete.
|
||||
#
|
||||
FILE_CATEGORIES_TO_DELETE = FILE_CATEGORIES - FILE_CATEGORIES_TO_KEEP
|
||||
|
||||
|
||||
class FileTracker(object):
|
||||
"""
|
||||
organize output files of calculations.
|
||||
|
||||
the file manager stores references to data files generated during calculations
|
||||
and cleans up unused files according to a range of filter criteria.
|
||||
"""
|
||||
|
||||
## @var files_to_delete (set)
|
||||
# categories of generated files that should be deleted after the calculation.
|
||||
#
|
||||
# each string of this set marks a category of files to be deleted.
|
||||
# the complete set of recognized categories is files.FILE_CATEGORIES.
|
||||
# the default setting after initialization is files.FILE_CATEGORIES_TO_DELETE.
|
||||
#
|
||||
# in optimization modes, an output file is kept only
|
||||
# if its model produced one of the best R-factors and
|
||||
# its category is not listed in this set.
|
||||
# all other (bad R-factor) files are deleted regardless of their category.
|
||||
|
||||
## @var keep_rfac (int)
|
||||
# number of best models to keep.
|
||||
#
|
||||
# if @c 'rfac' is set in files_to_delete, all files of bad models (regardless of their category) are deleted.
|
||||
# this parameter specifies how many of the best models are kept.
|
||||
#
|
||||
# the default is 10.
|
||||
|
||||
## @var _last_id (int)
|
||||
# last used file identification number (incremental)
|
||||
|
||||
## @var _path_by_id (dict)
|
||||
# key = file id, value = file path
|
||||
|
||||
## @var _model_by_id (dict)
|
||||
# key = file id, value = model number
|
||||
|
||||
## @var _category_by_id (dict)
|
||||
# key = file id, value = category (str)
|
||||
|
||||
## @var _rfac_by_model (dict)
|
||||
# key = model number, value = file id
|
||||
|
||||
## @var _complete_by_model (dict)
|
||||
# key = model number, value (boolean) = all calculations complete, files can be deleted
|
||||
|
||||
def __init__(self):
|
||||
self._id_by_path = {}
|
||||
self._path_by_id = {}
|
||||
self._model_by_id = {}
|
||||
self._category_by_id = {}
|
||||
self._rfac_by_model = {}
|
||||
self._complete_by_model = {}
|
||||
self._last_id = 0
|
||||
self.categories_to_delete = FILE_CATEGORIES_TO_DELETE
|
||||
self.keep_rfac = 10
|
||||
|
||||
def add_file(self, path, model, category='default'):
|
||||
"""
|
||||
add a new data file to the list.
|
||||
|
||||
@param path: (str) system path of the file relative to the working directory.
|
||||
|
||||
@param model: (int) model number
|
||||
|
||||
@param category: (str) file category, e.g. 'output', etc.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._last_id += 1
|
||||
_id = self._last_id
|
||||
self._id_by_path[path] = _id
|
||||
self._path_by_id[_id] = path
|
||||
self._model_by_id[_id] = model
|
||||
self._category_by_id[_id] = category
|
||||
|
||||
def rename_file(self, old_path, new_path):
|
||||
"""
|
||||
rename a data file in the list.
|
||||
|
||||
the method does not rename the file in the file system.
|
||||
|
||||
@param old_path: must match an existing file path identically.
|
||||
if old_path is not in the list, the method does nothing.
|
||||
|
||||
@param new_path: new path.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
_id = self._id_by_path[old_path]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
del self._id_by_path[old_path]
|
||||
self._id_by_path[new_path] = _id
|
||||
self._path_by_id[_id] = new_path
|
||||
|
||||
def remove_file(self, path):
|
||||
"""
|
||||
remove a file from the list.
|
||||
|
||||
the method does not delete the file from the file system.
|
||||
|
||||
@param path: must match an existing file path identically.
|
||||
if path is not in the list, the method does nothing.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
_id = self._id_by_path[path]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
del self._id_by_path[path]
|
||||
del self._path_by_id[_id]
|
||||
del self._model_by_id[_id]
|
||||
del self._category_by_id[_id]
|
||||
|
||||
def update_model_rfac(self, model, rfac):
|
||||
"""
|
||||
update the stored R factors of all files that depend on a specified model.
|
||||
the model handler should set this flag if files with bad R factors should be deleted.
|
||||
by default (after adding files of a new model), the R factor is unset and
|
||||
delete_bad_rfac() will not act on that model.
|
||||
|
||||
@param model: (int) model number.
|
||||
@param rfac: (float) new R factor
|
||||
@return: None
|
||||
"""
|
||||
self._rfac_by_model[model] = rfac
|
||||
|
||||
def set_model_complete(self, model, complete):
|
||||
"""
|
||||
specify whether the calculations of a model are complete and its files can be deleted.
|
||||
the model handler must set this flag.
|
||||
by default (after adding files of a new model), it is False.
|
||||
|
||||
@param model: (int) model number.
|
||||
@param complete: (bool) True if all calculations of the model are complete (files can be deleted).
|
||||
@return: None
|
||||
"""
|
||||
self._complete_by_model[model] = complete
|
||||
|
||||
def delete_files(self, categories=None, keep_rfac=0):
|
||||
"""
|
||||
delete the files matching the list of categories.
|
||||
|
||||
@param categories: set of file categories to delete.
|
||||
may include 'rfac' if bad r-factors should be deleted additionally (regardless of static category).
|
||||
defaults to self.categories_to_delete.
|
||||
|
||||
@param keep_rfac: number of best models to keep if bad r-factors are to be deleted.
|
||||
the effective keep number is the greater of self.keep_rfac and this argument.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if categories is None:
|
||||
categories = self.categories_to_delete
|
||||
for cat in categories:
|
||||
self.delete_category(cat)
|
||||
if 'rfac' in categories:
|
||||
self.delete_bad_rfac(keep=keep_rfac)
|
||||
|
||||
def delete_bad_rfac(self, keep=0, force_delete=False):
|
||||
"""
|
||||
delete the files of all models except a specified number of good models.
|
||||
|
||||
the method first determines which models to keep.
|
||||
models with R factor values of 0.0, without a specified R-factor, and
|
||||
the specified number of best ranking non-zero models are kept.
|
||||
the files belonging to the keeper models are kept, all others are deleted,
|
||||
regardless of category.
|
||||
files of incomplete models are also kept.
|
||||
|
||||
the files are deleted from the list and the file system.
|
||||
|
||||
files are deleted only if 'rfac' is specified in self.categories_to_delete
|
||||
or if force_delete is set to True.
|
||||
otherwise the method does nothing.
|
||||
|
||||
@param keep: number of files to keep.
|
||||
the effective keep number is the greater of self.keep_rfac and this argument.
|
||||
|
||||
@param force_delete: delete the bad files even if 'rfac' is not selected in categories_to_delete.
|
||||
|
||||
@return: None
|
||||
|
||||
@todo should clean up rfac and model dictionaries from time to time.
|
||||
"""
|
||||
if force_delete or 'rfac' in self.categories_to_delete:
|
||||
keep = max(keep, self.keep_rfac)
|
||||
rfacs = [r for r in sorted(self._rfac_by_model.values()) if r > 0.0]
|
||||
try:
|
||||
rfac_split = rfacs[keep-1]
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
complete_models = {_model for (_model, _complete) in self._complete_by_model.iteritems() if _complete}
|
||||
del_models = {_model for (_model, _rfac) in self._rfac_by_model.iteritems() if _rfac > rfac_split}
|
||||
del_models &= complete_models
|
||||
del_ids = {_id for (_id, _model) in self._model_by_id.iteritems() if _model in del_models}
|
||||
for _id in del_ids:
|
||||
self.delete_file(_id)
|
||||
|
||||
def delete_category(self, category):
|
||||
"""
|
||||
delete all files of a specified category from the list and the file system.
|
||||
|
||||
only files of complete models (cf. set_model_complete()) are deleted, but regardless of R-factor.
|
||||
|
||||
@param category: (str) category.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
complete_models = {_model for (_model, _complete) in self._complete_by_model.iteritems() if _complete}
|
||||
del_ids = {_id for (_id, cat) in self._category_by_id.iteritems() if cat == category}
|
||||
del_ids &= {_id for (_id, _model) in self._model_by_id.iteritems() if _model in complete_models}
|
||||
for _id in del_ids:
|
||||
self.delete_file(_id)
|
||||
|
||||
def delete_file(self, _id):
|
||||
"""
|
||||
delete a specified file from the list and the file system.
|
||||
|
||||
the file is identified by ID number.
|
||||
this method is unconditional. it does not consider category, completeness, nor R-factor.
|
||||
|
||||
@param _id: (int) ID number of the file to delete.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
path = self._path_by_id[_id]
|
||||
cat = self._category_by_id[_id]
|
||||
model = self._model_by_id[_id]
|
||||
del self._id_by_path[path]
|
||||
del self._path_by_id[_id]
|
||||
del self._model_by_id[_id]
|
||||
del self._category_by_id[_id]
|
||||
try:
|
||||
self._os_delete_file(path)
|
||||
except OSError:
|
||||
logger.warning("error deleting file {0}".format(path))
|
||||
else:
|
||||
logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model))
|
||||
|
||||
@staticmethod
|
||||
def _os_delete_file(path):
|
||||
"""
|
||||
have the operating system delete a file path.
|
||||
|
||||
this function is separate so that we can mock it in unit tests.
|
||||
|
||||
@param path: OS path
|
||||
@return: None
|
||||
"""
|
||||
os.remove(path)
|
||||
280
pmsco/gradient.py
Normal file
280
pmsco/gradient.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
gradient optimization module for MSC calculations
|
||||
|
||||
the module starts multiple MSC calculations and optimizes the model parameters
|
||||
with a gradient search.
|
||||
|
||||
the optimization task is distributed over multiple processes using MPI.
|
||||
the optimization must be started with N+1 processes in the MPI environment,
|
||||
where N equals the number of fit parameters.
|
||||
|
||||
IMPLEMENTATION IN PROGRESS - DEBUGGING
|
||||
|
||||
Requires: scipy, numpy
|
||||
|
||||
Author: Matthias Muntwiler
|
||||
|
||||
Copyright (c) 2015 by Paul Scherrer Institut
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import scipy.optimize as so
|
||||
import data as md
|
||||
from mpi4py import MPI
|
||||
|
||||
# messages sent from master to slaves
|
||||
|
||||
# master sends new assignment
|
||||
# the message is a dictionary of model parameters
|
||||
TAG_NEW_TASK = 1
|
||||
# master calls end of calculation
|
||||
# the message is empty
|
||||
TAG_FINISH = 2
|
||||
# master sends current population
|
||||
# currently not used
|
||||
TAG_POPULATION = 2
|
||||
|
||||
# messages sent from slaves to master
|
||||
# slave reports new result
|
||||
# the message is a dictionary of model parameters and results
|
||||
TAG_NEW_RESULT = 1
|
||||
# slave confirms end of calculation
|
||||
# currently not used
|
||||
TAG_FINISHED = 2
|
||||
|
||||
class MscProcess(object):
|
||||
"""
|
||||
Code shared by MscoMaster and MscoSlave
|
||||
"""
|
||||
def __init__(self, comm):
|
||||
self.comm = comm
|
||||
|
||||
def setup(self, project):
|
||||
self.project = project
|
||||
self.running = False
|
||||
self.finishing = False
|
||||
self.iteration = 0
|
||||
|
||||
def run(self):
|
||||
pass
|
||||
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
def calc(self, pars):
|
||||
"""
|
||||
Executes a single MSC calculation.
|
||||
|
||||
pars: A dictionary of parameters expected by the cluster and parameters functions.
|
||||
|
||||
returns: pars with three additional values:
|
||||
rank: rank of the calculation process
|
||||
index: iteration index of the calculation process
|
||||
rfac: resulting R-factor
|
||||
|
||||
all other calculation results are discarded.
|
||||
"""
|
||||
rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration)
|
||||
|
||||
# create parameter and cluster structures
|
||||
clu = self.project.create_cluster(pars)
|
||||
par = self.project.create_params(pars)
|
||||
|
||||
# generate file names
|
||||
base_filename = "%s_%u_%u" % (self.project.output_file, self.comm.rank, self.iteration)
|
||||
|
||||
# call the msc program
|
||||
result_etpi = self.project.run_calc(par, clu, self.project.scan_file, base_filename, delete_files=True)
|
||||
|
||||
# calculate modulation function and R-factor
|
||||
result_etpi = md.calc_modfunc_lowess(result_etpi)
|
||||
result_r = md.rfactor(self.project.scan_modf, result_etpi)
|
||||
|
||||
pars['rank'] = self.comm.rank
|
||||
pars['iter'] = self.iteration
|
||||
pars['rfac'] = result_r
|
||||
|
||||
return pars
|
||||
|
||||
class MscMaster(MscProcess):
|
||||
def __init__(self, comm):
|
||||
super(MscMaster, self).__init__(comm)
|
||||
self.slaves = self.comm.Get_size() - 1
|
||||
self.running_slaves = 0
|
||||
|
||||
def setup(self, project):
|
||||
super(MscMaster, self).setup(project)
|
||||
self.dom = project.create_domain()
|
||||
self.running_slaves = self.slaves
|
||||
|
||||
self._outfile = open(self.project.output_file + ".dat", "w")
|
||||
self._outfile.write("#")
|
||||
self._outfile_keys = self.dom.start.keys()
|
||||
self._outfile_keys.append('rfac')
|
||||
for name in self._outfile_keys:
|
||||
self._outfile.write(" " + name)
|
||||
self._outfile.write("\n")
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
starts the minimization
|
||||
"""
|
||||
# pack initial guess, bounds, constant parameters
|
||||
nparams = len(self.dom.start)
|
||||
fit_params = np.zeros((nparams))
|
||||
params_index = {}
|
||||
const_params = self.dom.max.copy()
|
||||
bounds = []
|
||||
n_fit_params = 0
|
||||
for key in self.dom.start:
|
||||
if self.dom.max[key] > self.dom.min[key]:
|
||||
fit_params[n_fit_params] = self.dom.start[key]
|
||||
params_index[key] = n_fit_params
|
||||
n_fit_params += 1
|
||||
bounds.append((self.dom.min[key], self.dom.max[key]))
|
||||
fit_params.resize((n_fit_params))
|
||||
|
||||
fit_result = so.minimize(self._minfunc, fit_params,
|
||||
args=(params_index, const_params),
|
||||
method='L-BFGS-B', jac=True, bounds=bounds)
|
||||
|
||||
msc_result = const_params.copy()
|
||||
for key, index in params_index.items():
|
||||
msc_result[key] = fit_result.x[index]
|
||||
msc_result['rfac'] = fit_result.fun
|
||||
|
||||
self._outfile.write("# result of gradient optimization\n")
|
||||
self._outfile.write("# success = {0}, iterations = {1}, calculations = {2}\n".format(fit_result.success, fit_result.nit, fit_result.nfev))
|
||||
self._outfile.write("# message: {0}\n".format(fit_result.message))
|
||||
for name in self._outfile_keys:
|
||||
self._outfile.write(" " + str(msc_result[name]))
|
||||
self._outfile.write("\n")
|
||||
|
||||
def _minfunc(self, fit_params, params_index, const_params):
|
||||
"""
|
||||
function to be minimized
|
||||
|
||||
fit_params (numpy.ndarray): current fit position
|
||||
master (MscoMaster): reference to the master process
|
||||
params_index (dict): dictionary of fit parameters
|
||||
and their index in fit_params.
|
||||
key=MSC parameter name, value=index to fit_params.
|
||||
const_params (dict): dictionary of MSC parameters
|
||||
holding (at least) the constant parameter values.
|
||||
a copy of this instance, updated with the current fit position,
|
||||
is passed to MSC.
|
||||
"""
|
||||
|
||||
# unpack parameters
|
||||
msc_params = const_params.copy()
|
||||
for key, index in params_index.items():
|
||||
msc_params[key] = fit_params[index]
|
||||
|
||||
# run MSC calculations
|
||||
rfac, jac_dict = self.run_msc_calcs(msc_params, params_index)
|
||||
|
||||
# pack jacobian
|
||||
jac_arr = np.zeros_like(fit_params)
|
||||
for key, index in params_index.items():
|
||||
jac_arr[index] = jac_dict[key]
|
||||
|
||||
return rfac, jac_arr
|
||||
|
||||
def run_msc_calcs(self, params, params_index):
|
||||
"""
|
||||
params: dictionary of actual parameters
|
||||
params_index: dictionary of fit parameter indices.
|
||||
only the keys are used here
|
||||
to decide for which parameters the derivative is calculated.
|
||||
|
||||
returns:
|
||||
(float) R-factor at the params location
|
||||
(dict) approximate gradient at the params location
|
||||
"""
|
||||
# distribute tasks for gradient
|
||||
slave_rank = 1
|
||||
for key in params_index:
|
||||
params2 = params.copy()
|
||||
params2[key] += self.dom.step[key]
|
||||
params2['key'] = key
|
||||
self.comm.send(params2, dest=slave_rank, tag=TAG_NEW_TASK)
|
||||
slave_rank += 1
|
||||
|
||||
# run calculation for actual position
|
||||
result0 = self.calc(params)
|
||||
for name in self._outfile_keys:
|
||||
self._outfile.write(" " + str(result0[name]))
|
||||
self._outfile.write("\n")
|
||||
|
||||
# gather results
|
||||
s = MPI.Status()
|
||||
jacobian = params.copy()
|
||||
for slave in range(1, slave_rank):
|
||||
result1 = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
|
||||
if s.tag == TAG_NEW_RESULT:
|
||||
key = result1['key']
|
||||
jacobian[key] = (result1['rfac'] - result0['rfac']) / (result1[key] - result0[key])
|
||||
for name in self._outfile_keys:
|
||||
self._outfile.write(" " + str(result1[name]))
|
||||
self._outfile.write("\n")
|
||||
|
||||
self._outfile.flush()
|
||||
return result0['rfac'], jacobian
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup: close output file, terminate slave processes
|
||||
"""
|
||||
self._outfile.close()
|
||||
for rank in range(1, self.running_slaves + 1):
|
||||
self.comm.send(None, dest=rank, tag=TAG_FINISH)
|
||||
super(MscMaster, self).cleanup()
|
||||
|
||||
class MscSlave(MscProcess):
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Waits for messages from the master and dispatches tasks.
|
||||
"""
|
||||
s = MPI.Status()
|
||||
self.running = True
|
||||
while self.running:
|
||||
data = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
|
||||
if s.tag == TAG_NEW_TASK:
|
||||
self.accept_task(data)
|
||||
elif s.tag == TAG_FINISH:
|
||||
self.running = False
|
||||
|
||||
def accept_task(self, pars):
|
||||
"""
|
||||
Executes a calculation task and returns the result to the master.
|
||||
"""
|
||||
result = self.calc(pars)
|
||||
self.comm.send(result, dest=0, tag=TAG_NEW_RESULT)
|
||||
self.iteration += 1
|
||||
|
||||
def optimize(project):
|
||||
"""
|
||||
main entry point for optimization
|
||||
|
||||
rank 0: starts the calculation, distributes tasks
|
||||
ranks 1...N-1: work on assignments from rank 0
|
||||
"""
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
|
||||
if mpi_rank == 0:
|
||||
master = MscMaster(mpi_comm)
|
||||
master.setup(project)
|
||||
master.run()
|
||||
master.cleanup()
|
||||
else:
|
||||
slave = MscSlave(mpi_comm)
|
||||
slave.setup(project)
|
||||
slave.run()
|
||||
slave.cleanup()
|
||||
409
pmsco/grid.py
Normal file
409
pmsco/grid.py
Normal file
@@ -0,0 +1,409 @@
|
||||
"""
|
||||
@package pmsco.grid
|
||||
grid search optimization handler.
|
||||
|
||||
the module starts multiple MSC calculations and varies parameters on a fixed coordinate grid.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import copy
|
||||
import os
|
||||
import datetime
|
||||
import numpy as np
|
||||
import logging
|
||||
import handlers
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GridPopulation(object):
|
||||
"""
|
||||
grid population.
|
||||
"""
|
||||
|
||||
## @var model_start
|
||||
# (dict) initial model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_min
|
||||
# (dict) low limits of the model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_max
|
||||
# (dict) high limits of the model parameters.
|
||||
# if min == max, the parameter is kept constant.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_max
|
||||
# (dict) high limits of the model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_step
|
||||
# (dict) initial velocity (difference between two steps) of the particle.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_count
|
||||
# number of models (grid points).
|
||||
# initial value = 0.
|
||||
|
||||
## @var positions
|
||||
# (numpy.ndarray) flat list of grid coordinates and results.
|
||||
#
|
||||
# the column names include the names of the model parameters, taken from domain.start,
|
||||
# and the special names @c '_model', @c '_rfac'.
|
||||
# the special fields have the following meanings:
|
||||
#
|
||||
# * @c '_model': model number.
|
||||
# the model number counts identifies the grid point.
|
||||
# the field is used to associate the result of a calculation with the coordinate vector.
|
||||
# the model handlers use it to derive their model ID.
|
||||
#
|
||||
# * @c '_rfac': calculated R-factor for this position.
|
||||
# it is set by the add_result() method.
|
||||
#
|
||||
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
|
||||
# this object is a <em>view</em> of the original array item
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the population object.
|
||||
|
||||
"""
|
||||
self.model_start = {}
|
||||
self.model_min = {}
|
||||
self.model_max = {}
|
||||
self.model_step = {}
|
||||
|
||||
self.model_count = 0
|
||||
|
||||
self.positions = None
|
||||
|
||||
self.search_keys = []
|
||||
self.fixed_keys = []
|
||||
|
||||
@staticmethod
|
||||
def get_model_dtype(model_params):
|
||||
"""
|
||||
get numpy array data type for model parameters and grid control variables.
|
||||
|
||||
@param model_params: dictionary of model parameters or list of parameter names.
|
||||
|
||||
@return: dtype for use with numpy array constructors.
|
||||
this is a sorted list of (name, type) tuples.
|
||||
"""
|
||||
dt = []
|
||||
for key in model_params:
|
||||
dt.append((key, 'f4'))
|
||||
dt.append(('_model', 'i4'))
|
||||
dt.append(('_rfac', 'f4'))
|
||||
dt.sort(key=lambda t: t[0].lower())
|
||||
return dt
|
||||
|
||||
def setup(self, domain):
|
||||
"""
|
||||
set up the population and result arrays.
|
||||
|
||||
@param domain: definition of initial and limiting model parameters
|
||||
expected by the cluster and parameters functions.
|
||||
|
||||
@param domain.start: values of the fixed parameters.
|
||||
|
||||
@param domain.min: minimum values allowed.
|
||||
|
||||
@param domain.max: maximum values allowed.
|
||||
if abs(max - min) < step/2 , the parameter is kept constant.
|
||||
|
||||
@param domain.step: step size (distance between two grid points).
|
||||
if step <= 0, the parameter is kept constant.
|
||||
"""
|
||||
self.model_start = domain.start
|
||||
self.model_min = domain.min
|
||||
self.model_max = domain.max
|
||||
self.model_step = domain.step
|
||||
|
||||
self.model_count = 1
|
||||
self.search_keys = []
|
||||
self.fixed_keys = []
|
||||
scales = []
|
||||
|
||||
for p in domain.step.keys():
|
||||
if domain.step[p] > 0:
|
||||
n = np.round((domain.max[p] - domain.min[p]) / domain.step[p]) + 1
|
||||
else:
|
||||
n = 1
|
||||
if n > 1:
|
||||
self.search_keys.append(p)
|
||||
scales.append(np.linspace(domain.min[p], domain.max[p], n))
|
||||
else:
|
||||
self.fixed_keys.append(p)
|
||||
|
||||
# scales is a list of 1D arrays that hold the coordinates of the individual dimensions
|
||||
# nd_positions is a list of N-D arrays that hold the coordinates in all multiple dimensions
|
||||
# flat_positions is a list of 1D arrays that hold the coordinates in flat sequence
|
||||
if len(scales) > 1:
|
||||
positions_nd = np.meshgrid(*scales, indexing='ij')
|
||||
positions_flat = [arr.flatten() for arr in positions_nd]
|
||||
else:
|
||||
positions_flat = scales
|
||||
self.model_count = positions_flat[0].shape[0]
|
||||
|
||||
# shuffle the calculation order so that we may see the more interesting parts earlier
|
||||
shuffle_index = np.arange(self.model_count)
|
||||
np.random.shuffle(shuffle_index)
|
||||
positions_reordered = [pos[shuffle_index] for pos in positions_flat]
|
||||
|
||||
dt = self.get_model_dtype(self.model_min)
|
||||
|
||||
# positions
|
||||
self.positions = np.zeros(self.model_count, dtype=dt)
|
||||
|
||||
for idx, key in enumerate(self.search_keys):
|
||||
self.positions[key] = positions_reordered[idx]
|
||||
for idx, key in enumerate(self.fixed_keys):
|
||||
self.positions[key] = self.model_start[key]
|
||||
|
||||
self.positions['_model'] = np.arange(self.model_count)
|
||||
self.positions['_rfac'] = 2.1
|
||||
|
||||
def add_result(self, particle, rfac):
|
||||
"""
|
||||
add a calculation particle to the results array.
|
||||
|
||||
@param particle: dictionary of model parameters and particle values.
|
||||
the keys must correspond to the columns of the pos array,
|
||||
i.e. the names of the model parameters plus the _rfac, and _model fields.
|
||||
|
||||
@param rfac: calculated R-factor.
|
||||
the R-factor is written to the '_rfac' field.
|
||||
|
||||
@return None
|
||||
"""
|
||||
model = particle['_model']
|
||||
self.positions['_rfac'][model] = rfac
|
||||
|
||||
def save_array(self, filename, array):
|
||||
"""
|
||||
saves a population array to a text file.
|
||||
|
||||
@param array: population array to save.
|
||||
must be one of self.pos, self.vel, self.best, self.results
|
||||
"""
|
||||
header = " ".join(self.positions.dtype.names)
|
||||
np.savetxt(filename, array, fmt='%g', header=header)
|
||||
|
||||
def load_array(self, filename, array):
|
||||
"""
|
||||
load a population array from a text file.
|
||||
|
||||
the array to load must be compatible with the current population
|
||||
(same number of rows, same columns).
|
||||
the first row must contain column names.
|
||||
the ordering of columns may be different.
|
||||
the returned array is ordered according to the array argument.
|
||||
|
||||
@param array: population array to load.
|
||||
must be one of self.pos, self.vel, self.results.
|
||||
|
||||
@return array with loaded data.
|
||||
this may be the same instance as on input.
|
||||
|
||||
@raise AssertionError if the number of rows of the two files differ.
|
||||
"""
|
||||
data = np.genfromtxt(filename, names=True)
|
||||
assert data.shape == array.shape
|
||||
for name in data.dtype.names:
|
||||
array[name] = data[name]
|
||||
return array
|
||||
|
||||
def save_population(self, base_filename):
|
||||
"""
|
||||
saves the population array to a set of text files.
|
||||
|
||||
the file name extensions are .pos, .vel, and .best
|
||||
"""
|
||||
self.save_array(base_filename + ".pos", self.positions)
|
||||
|
||||
def load_population(self, base_filename):
|
||||
"""
|
||||
loads the population array from a set of previously saved text files.
|
||||
this can be used to continue an optimization job.
|
||||
|
||||
the file name extensions are .pos, .vel, and .best.
|
||||
the files must have the same format as produced by save_population.
|
||||
the files must have the same number of rows.
|
||||
"""
|
||||
self.load_array(base_filename + ".pos", self.positions)
|
||||
|
||||
def save_results(self, filename):
|
||||
"""
|
||||
saves the complete list of calculations results.
|
||||
"""
|
||||
self.save_array(filename, self.positions)
|
||||
|
||||
|
||||
class GridSearchHandler(handlers.ModelHandler):
|
||||
"""
|
||||
model handler which implements the grid search algorithm.
|
||||
|
||||
"""
|
||||
|
||||
## @var _pop (Population)
|
||||
# holds the population object.
|
||||
|
||||
## @var _outfile (file)
|
||||
# output file for model parametes and R factor.
|
||||
# the file is open during calculations.
|
||||
# each calculation result adds one line.
|
||||
|
||||
## @var _model_time (timedelta)
|
||||
# estimated CPU time to calculate one model.
|
||||
# this value is the maximum time measured of the completed calculations.
|
||||
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
|
||||
|
||||
## @var _timeout (bool)
|
||||
# indicates when the handler has run out of time,
|
||||
# i.e. time is up before convergence has been reached.
|
||||
# if _timeout is True, create_tasks() will not create further tasks,
|
||||
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
|
||||
|
||||
def __init__(self):
|
||||
super(GridSearchHandler, self).__init__()
|
||||
self._pop = None
|
||||
self._outfile = None
|
||||
self._model_time = datetime.timedelta()
|
||||
self._timeout = False
|
||||
self._invalid_limit = 10
|
||||
self._next_model = 0
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the particle swarm and open an output file.
|
||||
|
||||
@param project:
|
||||
|
||||
@param slots: number of calculation processes available through MPI.
|
||||
for efficiency reasons we set the population size twice the number of available slots.
|
||||
the minimum number of slots is 1, the recommended value is 10 or greater.
|
||||
the population size is set to at least 4.
|
||||
|
||||
@return:
|
||||
"""
|
||||
super(GridSearchHandler, self).setup(project, slots)
|
||||
|
||||
self._pop = GridPopulation()
|
||||
self._pop.setup(self._project.create_domain())
|
||||
self._invalid_limit = max(slots, self._invalid_limit)
|
||||
|
||||
self._outfile = open(self._project.output_file + ".dat", "w")
|
||||
self._outfile.write("# ")
|
||||
self._outfile.write(" ".join(self._pop.positions.dtype.names))
|
||||
self._outfile.write("\n")
|
||||
|
||||
return None
|
||||
|
||||
def cleanup(self):
|
||||
self._outfile.close()
|
||||
super(GridSearchHandler, self).cleanup()
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
develop the particle population and create a calculation task per particle.
|
||||
|
||||
this method advances the population by one step, and generates one task per particle.
|
||||
during the first call, the method first sets up a new population.
|
||||
|
||||
the process loop calls this method every time the length of the task queue drops
|
||||
below the number of calculation processes (slots).
|
||||
|
||||
@return list of generated tasks. empty list if all grid points have been calculated.
|
||||
"""
|
||||
|
||||
super(GridSearchHandler, self).create_tasks(parent_task)
|
||||
|
||||
# this is the top-level handler, we expect just one parent: root.
|
||||
parent_id = parent_task.id
|
||||
assert parent_id == (-1, -1, -1, -1, -1)
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
|
||||
time_pending = self._model_time * len(self._pending_tasks)
|
||||
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
|
||||
|
||||
out_tasks = []
|
||||
time_pending += self._model_time
|
||||
if time_pending > time_avail:
|
||||
self._timeout = True
|
||||
|
||||
model = self._next_model
|
||||
if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
pos = self._pop.positions[model]
|
||||
new_task.model = {k:pos[k] for k in pos.dtype.names}
|
||||
new_task.change_id(model=model)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
out_tasks.append(new_task)
|
||||
self._next_model += 1
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
calculate the R factor of the result and store it in the positions array.
|
||||
|
||||
* append the result to the result output file.
|
||||
* update the execution time statistics.
|
||||
* remove temporary files if requested.
|
||||
* check whether the grid search is complete.
|
||||
|
||||
@return parent task (CalculationTask) if the search is complete, @c None otherwise.
|
||||
"""
|
||||
super(GridSearchHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
self._invalid_count += 1
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self._pop.add_result(task.model, rfac)
|
||||
|
||||
if self._outfile:
|
||||
s = (str(task.model[name]) for name in self._pop.positions.dtype.names)
|
||||
self._outfile.write(" ".join(s))
|
||||
self._outfile.write("\n")
|
||||
self._outfile.flush()
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
if task.result_valid:
|
||||
if task.time > self._model_time:
|
||||
self._model_time = task.time
|
||||
|
||||
# grid search complete?
|
||||
if len(self._pending_tasks) == 0:
|
||||
del self._parent_tasks[parent_task.id]
|
||||
else:
|
||||
parent_task = None
|
||||
|
||||
self.cleanup_files()
|
||||
return parent_task
|
||||
948
pmsco/handlers.py
Normal file
948
pmsco/handlers.py
Normal file
@@ -0,0 +1,948 @@
|
||||
"""
|
||||
@package pmsco.handlers
|
||||
project-independent task handlers for models, scans, symmetries, emitters and energies.
|
||||
|
||||
calculation tasks are organized in a hierarchical tree.
|
||||
at each node, a task handler (feel free to find a better name)
|
||||
creates a set of child tasks according to the optimization mode and requirements of the project.
|
||||
at the end points of the tree, the tasks are ready to be sent to calculation program.
|
||||
the handlers collect the results, and return one combined dataset per node.
|
||||
the passing of tasks and results between handlers is managed by the processing loop.
|
||||
|
||||
<em>model handlers</em> define the model parameters used in calculations.
|
||||
the parameters can be chosen according to user input, or according to a structural optimization algorithm.
|
||||
a model handler class derives from the ModelHandler class.
|
||||
the most simple one, SingleModelHandler, is implemented in this module.
|
||||
it calculates the diffraction pattern of a single model with the start parameters given in the domain object.
|
||||
the handlers of the structural optimizers are declared in separate modules.
|
||||
|
||||
<em>scan handlers</em> split a task into one child task per scan file.
|
||||
scans are defined by the project.
|
||||
the actual merging step from multiple scans into one result dataset is delegated to the project class.
|
||||
|
||||
<em>symmetry handlers</em> split a task into one child per symmetry.
|
||||
symmetries are defined by the project.
|
||||
the actual merging step from multiple symmetries into one result dataset is delegated to the project class.
|
||||
|
||||
<em>emitter handlers</em> split a task into one child per emitter configuration (inequivalent sets of emitting atoms).
|
||||
emitter configurations are defined by the project.
|
||||
the merging of calculation results of emitter configurations is delegated to the project class.
|
||||
since emitters contribute incoherently to the diffraction pattern,
|
||||
it should make no difference how the emitters are grouped and calculated.
|
||||
code inspection and tests have shown that per-emitter results from EDAC can be simply added.
|
||||
|
||||
<em>energy handlers</em> may split a calculation task into multiple tasks
|
||||
in order to take advantage of parallel processing.
|
||||
|
||||
while several classes of model handlers are available,
|
||||
the default handlers for scans, symmetries, emitters and energies should be sufficient in most situations.
|
||||
the scan and symmetry handlers call methods of the project class to invoke project-specific functionality.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-17 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import datetime
|
||||
import os
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import data as md
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskHandler(object):
|
||||
"""
|
||||
common ancestor for task handlers.
|
||||
|
||||
this class defines the common interface of task handlers.
|
||||
"""
|
||||
|
||||
## @var project
|
||||
# (Project) project instance.
|
||||
|
||||
## @var slots
|
||||
# (int) number of calculation slots (processes).
|
||||
#
|
||||
# for best efficiency the number of tasks generated should be greater or equal the number of slots.
|
||||
# it should not exceed N times the number of slots, where N is a reasonably small number.
|
||||
|
||||
## @var _pending_tasks
|
||||
# (dict) pending tasks by ID (created but not yet calculated).
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var _complete_tasks
|
||||
# (dict) complete tasks by ID (calculation finished, parent not yet complete).
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var _parent_tasks
|
||||
# (dict) pending parent tasks by ID.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id,
|
||||
# the values are the corresponding CalculationTask objects.
|
||||
|
||||
## @var invalid_count (int)
|
||||
# accumulated total number of invalid results received.
|
||||
#
|
||||
# the number is incremented by add_result if an invalid task is reported.
|
||||
# the number can be used by descendants to terminate a hopeless calculation.
|
||||
|
||||
def __init__(self):
|
||||
self._project = None
|
||||
self._slots = 0
|
||||
self._pending_tasks = {}
|
||||
self._parent_tasks = {}
|
||||
self._complete_tasks = {}
|
||||
self._invalid_count = 0
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the handler with project data and the process environment.
|
||||
|
||||
the method is called once by the dispatcher before the calculation loop starts.
|
||||
the handler can initialize internal variables which it hasn't done in the constructor.
|
||||
|
||||
@param project (Project) project instance.
|
||||
|
||||
@param slots (int) number of calculation slots (processes).
|
||||
for best efficiency the number of tasks generated should be greater or equal the number of slots.
|
||||
it should not exceed N times the number of slots, where N is a reasonably small number.
|
||||
|
||||
@return None
|
||||
"""
|
||||
self._project = project
|
||||
self._slots = slots
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
clean up whatever is necessary, e.g. close files.
|
||||
|
||||
this method is called once after all calculations have finished.
|
||||
|
||||
@return None
|
||||
"""
|
||||
pass
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
create the next series of child tasks for the given parent task.
|
||||
|
||||
the method is called by the dispatcher when a new series of tasks should be generated.
|
||||
|
||||
when no more tasks are to be calculated, the method must return an empty list.
|
||||
processing will finish when all pending and running tasks are complete.
|
||||
|
||||
@param parent_task (CalculationTask) task with initial model parameters.
|
||||
|
||||
@return list of CalculationTask objects holding the parameters for the next calculations.
|
||||
the list must be empty if there are no more tasks.
|
||||
"""
|
||||
|
||||
return []
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the results of tasks created by the same handler.
|
||||
|
||||
this method collects the results of tasks that were created by self.create_tasks() and
|
||||
passes them on to the parent whenever a family (i.e. all tasks that have the same parent) is complete.
|
||||
when the family is complete, the method creates the data files that are represented by the parent task and
|
||||
signals to the caller that the parent task is complete.
|
||||
|
||||
the method is called by the dispatcher whenever a calculation task belonging to this handler completes.
|
||||
|
||||
as of this class, the method counts invalid results and
|
||||
adds the list of data files to the project's file tracker.
|
||||
collecting the tasks and combining their data must be implemented in sub-classes.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete,
|
||||
None if the family is not complete yet.
|
||||
As of this class, the method returns None.
|
||||
"""
|
||||
if not task.result_valid:
|
||||
self._invalid_count += 1
|
||||
|
||||
self.track_files(task)
|
||||
|
||||
return None
|
||||
|
||||
def track_files(self, task):
|
||||
"""
|
||||
register all task files with the file tracker of the project.
|
||||
|
||||
@param task: CalculationTask object.
|
||||
the id, model, and files attributes are required.
|
||||
if model contains a '_rfac' value, the r-factor is
|
||||
|
||||
@return: None
|
||||
"""
|
||||
model_id = task.id.model
|
||||
for path, cat in task.files.iteritems():
|
||||
self._project.files.add_file(path, model_id, category=cat)
|
||||
|
||||
def cleanup_files(self, keep=10):
|
||||
"""
|
||||
delete uninteresting files.
|
||||
|
||||
@param: number of best ranking models to keep.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._project.files.delete_files(keep_rfac=keep)
|
||||
|
||||
|
||||
class ModelHandler(TaskHandler):
|
||||
"""
|
||||
abstract model handler.
|
||||
|
||||
structural optimizers must be derived from this class and implement a loop on the model.
|
||||
"""
|
||||
|
||||
## @var datetime_limit (datetime.datetime)
|
||||
# date and time when the model handler should finish (regardless of result)
|
||||
# because the process may get killed by the scheduler after this time.
|
||||
#
|
||||
# the default is 100 days after creation of the handler.
|
||||
|
||||
def __init__(self):
|
||||
super(ModelHandler, self).__init__()
|
||||
self.datetime_limit = datetime.datetime.now() + datetime.timedelta(days=100)
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
create tasks for the next population of models.
|
||||
|
||||
the method is called repeatedly by the dispatcher when the calculation queue runs empty.
|
||||
the model should then create the next round of tasks, e.g. the next generation of a population.
|
||||
the number of tasks created can be as low as one.
|
||||
|
||||
when no more tasks are to be calculated, the method must return an empty list.
|
||||
processing will finish when all pending and running tasks are complete.
|
||||
|
||||
@note it is not possible to hold back calculations, or to wait for results.
|
||||
the handler must either return a task, or signal the end of the optimization process.
|
||||
|
||||
@param parent_task (CalculationTask) task with initial model parameters.
|
||||
|
||||
@return list of CalculationTask objects holding the parameters for the next calculations.
|
||||
the list must be empty if there are no more tasks.
|
||||
"""
|
||||
super(ModelHandler, self).create_tasks(parent_task)
|
||||
|
||||
return []
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine results of a scan.
|
||||
|
||||
this method is called by the dispatcher when all results for a scan are available.
|
||||
"""
|
||||
super(ModelHandler, self).add_result(task)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class SingleModelHandler(ModelHandler):
|
||||
"""
|
||||
single model calculation handler.
|
||||
|
||||
this class runs a single calculation on the start parameters defined in the domain of the project.
|
||||
"""
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
start one task with the start parameters.
|
||||
|
||||
subsequent calls will return an empty task list.
|
||||
|
||||
@param parent_task (CalculationTask) task with initial model parameters.
|
||||
"""
|
||||
super(SingleModelHandler, self).create_tasks(parent_task)
|
||||
|
||||
out_tasks = []
|
||||
if len(self._complete_tasks) + len(self._pending_tasks) == 0:
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
new_task = parent_task.copy()
|
||||
new_task.change_id(model=0)
|
||||
new_task.parent_id = parent_id
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
out_tasks.append(new_task)
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect the end result of a single calculation.
|
||||
|
||||
the SingleModelHandler runs calculations for a single model.
|
||||
this method assumes that it will be called just once.
|
||||
it returns the parent task to signal the end of the calculations.
|
||||
|
||||
the result file is not deleted regardless of the files_to_delete project option.
|
||||
the task ID is removed from the file name.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return (CalculationTask) parent task.
|
||||
|
||||
"""
|
||||
super(SingleModelHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
del self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.result_valid = task.result_valid
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.file_root + parent_task.file_ext
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.file_root + modf_ext
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self.save_report_file(task.model)
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
parent_task.time = task.time
|
||||
|
||||
return parent_task
|
||||
|
||||
def save_report_file(self, result):
|
||||
"""
|
||||
save model parameters and r-factor to a file.
|
||||
|
||||
the file name is derived from the project's output_file with '.dat' extension.
|
||||
the file has a space-separated column format.
|
||||
the first line contains the parameter names.
|
||||
this is the same format as used by the swarm and grid handlers.
|
||||
|
||||
@param result: dictionary of results and parameters. the values should be scalars and strings.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
keys = [key for key in result]
|
||||
keys.sort(key=lambda t: t[0].lower())
|
||||
vals = (str(result[key]) for key in keys)
|
||||
with open(self._project.output_file + ".dat", "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(keys))
|
||||
outfile.write("\n")
|
||||
outfile.write(" ".join(vals))
|
||||
outfile.write("\n")
|
||||
|
||||
|
||||
class ScanHandler(TaskHandler):
|
||||
"""
|
||||
split the parameters into one set per scan and gather the results.
|
||||
|
||||
the scan selection takes effect in MscoProcess.calc().
|
||||
"""
|
||||
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(ScanHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each scan of the given parent task.
|
||||
|
||||
all scans share the model parameters.
|
||||
|
||||
@return list of CalculationTask objects, with one element per scan.
|
||||
the scan index varies according to project.scans.
|
||||
"""
|
||||
super(ScanHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
assert parent_id not in self._pending_ids_per_parent.keys()
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
out_tasks = []
|
||||
for (i_scan, scan) in enumerate(self._project.scans):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(scan=i_scan)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no scan tasks generated. your project must link to at least one scan file.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the calculation results versus scan.
|
||||
|
||||
* mark the task as complete
|
||||
* store its result for later
|
||||
* check whether this was the last pending task of the family (belonging to the same parent).
|
||||
|
||||
the actual merging of data is delegated to the project's combine_scans() method.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(ScanHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all scans complete?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_scans(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'model')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'model')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SymmetryHandler(TaskHandler):
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(SymmetryHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each symmetry of the given parent task.
|
||||
|
||||
all symmetries share the same model parameters.
|
||||
|
||||
@return list of CalculationTask objects, with one element per symmetry.
|
||||
the symmetry index varies according to project.symmetries.
|
||||
"""
|
||||
super(SymmetryHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
out_tasks = []
|
||||
for (i_sym, sym) in enumerate(self._project.symmetries):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(sym=i_sym)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no symmetry tasks generated. your project must declare at least one symmetry.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the calculation results versus symmetry.
|
||||
|
||||
* mark the task as complete
|
||||
* store its result for later
|
||||
* check whether this was the last pending task of the family (belonging to the same parent).
|
||||
|
||||
the actual merging of data is delegated to the project's combine_symmetries() method.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(SymmetryHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all symmetries complete?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_symmetries(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class EmitterHandler(TaskHandler):
|
||||
"""
|
||||
the emitter handler distributes emitter configurations to calculation tasks and collects their results.
|
||||
|
||||
"""
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(EmitterHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each emitter configuration of the given parent task.
|
||||
|
||||
all emitters share the same model parameters.
|
||||
|
||||
@return list of @ref CalculationTask objects with one element per emitter configuration
|
||||
if parallel processing is enabled.
|
||||
otherwise the list contains a single CalculationTask object with emitter index 0.
|
||||
the emitter index is used by the project's create_cluster method.
|
||||
"""
|
||||
super(EmitterHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
n_emitters = self._project.cluster_generator.count_emitters(parent_task.model, parent_task.id)
|
||||
if n_emitters > 1 and self._slots > 1:
|
||||
emitters = range(1, n_emitters + 1)
|
||||
else:
|
||||
emitters = [0]
|
||||
|
||||
out_tasks = []
|
||||
for em in emitters:
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(emit=em)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no emitter tasks generated. your project must declare at least one emitter configuration.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
collect and combine the calculation results of inequivalent emitters.
|
||||
|
||||
* mark the task as complete
|
||||
* store its result for later
|
||||
* check whether this was the last pending task of the family (belonging to the same parent).
|
||||
|
||||
the actual merging of data is delegated to the project's combine_emitters() method.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(EmitterHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all emitters complete?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
self._project.combine_emitters(parent_task, child_tasks)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
|
||||
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class RegionHandler(TaskHandler):
|
||||
"""
|
||||
region handlers split a scan into a number of regions that can be calculated in parallel.
|
||||
|
||||
this class is an abstract base class.
|
||||
it implements only common code to combine different regions into one result.
|
||||
"""
|
||||
|
||||
## @var _pending_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of IDs referring to pending calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
## @var _complete_ids_per_parent
|
||||
# (dict) sets of child task IDs per parent
|
||||
#
|
||||
# each dictionary element is a set of complete calculation tasks (children)
|
||||
# belonging to a parent task identified by the key.
|
||||
#
|
||||
# the dictionary keys are the task identifiers CalculationTask.id of the parent tasks,
|
||||
# the values are sets of all child CalculationTask.id belonging to the parent.
|
||||
|
||||
def __init__(self):
|
||||
super(RegionHandler, self).__init__()
|
||||
self._pending_ids_per_parent = {}
|
||||
self._complete_ids_per_parent = {}
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
gather results of all regions that belong to the same parent.
|
||||
|
||||
@param task: (CalculationTask) calculation task that completed.
|
||||
|
||||
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
|
||||
"""
|
||||
super(RegionHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
|
||||
family_pending = self._pending_ids_per_parent[task.parent_id]
|
||||
family_complete = self._complete_ids_per_parent[task.parent_id]
|
||||
family_pending.remove(task.id)
|
||||
family_complete.add(task.id)
|
||||
|
||||
# all regions ready?
|
||||
if len(family_pending) == 0:
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
parent_task.file_ext = task.file_ext
|
||||
parent_task.result_filename = parent_task.format_filename()
|
||||
modf_ext = ".modf" + parent_task.file_ext
|
||||
parent_task.modf_filename = parent_task.format_filename(ext=modf_ext)
|
||||
|
||||
child_tasks = [self._complete_tasks[task_id] for task_id in sorted(family_complete)]
|
||||
|
||||
child_valid = [t.result_valid for t in child_tasks]
|
||||
parent_task.result_valid = reduce(lambda a, b: a and b, child_valid)
|
||||
child_times = [t.time for t in child_tasks]
|
||||
parent_task.time = reduce(lambda a, b: a + b, child_times)
|
||||
|
||||
if parent_task.result_valid:
|
||||
stack1 = [md.load_data(t.result_filename) for t in child_tasks]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(d, dtype) for d in stack1]
|
||||
result_data = np.hstack(tuple(stack2))
|
||||
md.sort_data(result_data)
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, "emitter")
|
||||
for t in child_tasks:
|
||||
self._project.files.remove_file(t.result_filename)
|
||||
|
||||
del self._pending_ids_per_parent[parent_task.id]
|
||||
del self._complete_ids_per_parent[parent_task.id]
|
||||
del self._parent_tasks[parent_task.id]
|
||||
|
||||
return parent_task
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class SingleRegionHandler(RegionHandler):
|
||||
"""
|
||||
trivial region handler
|
||||
|
||||
this is a trivial region handler.
|
||||
the whole parent task is identified as one region and calculated at once.
|
||||
"""
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate one calculation task for the parent task.
|
||||
|
||||
@return list of CalculationTask objects, with one element per region.
|
||||
the energy index enumerates the regions.
|
||||
"""
|
||||
super(SingleRegionHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(region=0)
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks = [new_task]
|
||||
return out_tasks
|
||||
|
||||
|
||||
class EnergyRegionHandler(RegionHandler):
|
||||
"""
|
||||
split a scan into a number of energy regions that can be run in parallel.
|
||||
|
||||
the purpose of this task handler is to save wall clock time on a multi-processor machine
|
||||
by splitting energy scans into smaller chunks.
|
||||
|
||||
the handler distributes the processing slots to the scans proportional to their scan lengths
|
||||
so that all child tasks of the same parent finish approximately in the same time.
|
||||
pure angle scans are not split.
|
||||
|
||||
to use this feature, the project assigns this class to its @ref handler_classes['region'].
|
||||
it is safe to use this handler for calculations that do not involve energy scans.
|
||||
the handler is best used for single calculations.
|
||||
in optimizations that calculate many models there is no advantage in using it
|
||||
(on the contrary, the overhead increases the total run time slightly.)
|
||||
"""
|
||||
|
||||
## @var _slots_per_scan
|
||||
# (list of integers) number of processor slots assigned to each scan,
|
||||
# i.e. number of chunks to split a scan region into.
|
||||
#
|
||||
# the sequence has the same order as self._project.scans.
|
||||
|
||||
def __init__(self):
|
||||
super(EnergyRegionHandler, self).__init__()
|
||||
self._slots_per_scan = []
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the handler with project data and the process environment.
|
||||
|
||||
this function distributes the processing slots to the scans.
|
||||
the slots are distributed proportional to the scan lengths of the energy scans
|
||||
so that all chunks have approximately the same size.
|
||||
|
||||
the number of slots per scan is stored in @ref _slots_per_scan for later use by @ref create_tasks.
|
||||
|
||||
@param project (Project) project instance.
|
||||
|
||||
@param slots (int) number of calculation slots (processes).
|
||||
|
||||
@return None
|
||||
"""
|
||||
super(EnergyRegionHandler, self).setup(project, slots)
|
||||
|
||||
scan_lengths = [scan.energies.shape[0] for scan in self._project.scans]
|
||||
total_length = sum(scan_lengths)
|
||||
f = min(1.0, float(self._slots) / total_length)
|
||||
self._slots_per_scan = [max(1, int(round(l * f))) for l in scan_lengths]
|
||||
|
||||
for i, scan in enumerate(self._project.scans):
|
||||
logger.debug(BMsg("region handler: split scan {file} into {slots} chunks",
|
||||
file=os.path.basename(scan.filename), slots=self._slots_per_scan[i]))
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
generate a calculation task for each energy region of the given parent task.
|
||||
|
||||
all child tasks share the model parameters.
|
||||
|
||||
@return list of CalculationTask objects, with one element per region.
|
||||
the energy index enumerates the regions.
|
||||
"""
|
||||
super(EnergyRegionHandler, self).create_tasks(parent_task)
|
||||
|
||||
parent_id = parent_task.id
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
self._pending_ids_per_parent[parent_id] = set()
|
||||
self._complete_ids_per_parent[parent_id] = set()
|
||||
|
||||
energies = self._project.scans[parent_id.scan].energies
|
||||
n_regions = self._slots_per_scan[parent_id.scan]
|
||||
regions = np.array_split(energies, n_regions)
|
||||
|
||||
out_tasks = []
|
||||
for ireg, reg in enumerate(regions):
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.change_id(region=ireg)
|
||||
if n_regions > 1:
|
||||
new_task.region['e'] = reg
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
self._pending_ids_per_parent[parent_id].add(child_id)
|
||||
|
||||
out_tasks.append(new_task)
|
||||
|
||||
if not out_tasks:
|
||||
logger.error("no region tasks generated. this is probably a bug.")
|
||||
|
||||
return out_tasks
|
||||
|
||||
|
||||
def choose_region_handler_class(project):
|
||||
"""
|
||||
choose a suitable region handler for the project.
|
||||
|
||||
the function returns the EnergyRegionHandler class
|
||||
if the project includes an energy scan with at least 10 steps.
|
||||
Otherwise, it returns the SingleRegionHandler.
|
||||
|
||||
angle scans do not benefit from region splitting in EDAC.
|
||||
|
||||
@param project: Project instance.
|
||||
@return: SingleRegionHandler or EnergyRegionHandler class.
|
||||
"""
|
||||
energy_scans = 0
|
||||
for scan in project.scans:
|
||||
if scan.energies.shape[0] >= 10:
|
||||
energy_scans += 1
|
||||
|
||||
if energy_scans >= 1:
|
||||
return EnergyRegionHandler
|
||||
else:
|
||||
return SingleRegionHandler
|
||||
8
pmsco/helpers.py
Normal file
8
pmsco/helpers.py
Normal file
@@ -0,0 +1,8 @@
|
||||
class BraceMessage:
|
||||
def __init__(self, fmt, *args, **kwargs):
|
||||
self.fmt = fmt
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __str__(self):
|
||||
return self.fmt.format(*self.args, **self.kwargs)
|
||||
2
pmsco/loess/.gitignore
vendored
Normal file
2
pmsco/loess/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
loess.py
|
||||
loess_wrap.c
|
||||
115
pmsco/loess/README
Normal file
115
pmsco/loess/README
Normal file
@@ -0,0 +1,115 @@
|
||||
Software for Locally-Weighted Regression 18 August 1992
|
||||
|
||||
William S. Cleveland
|
||||
Eric Grosse
|
||||
Ming-Jen Shyu
|
||||
|
||||
Locally-weighted regression, or loess, is a procedure for estimating a
|
||||
regression surface by a multivariate smoothing procedure: fitting a
|
||||
linear or quadratic function of the independent variables in a moving
|
||||
fashion that is analogous to how a moving average is computed for a
|
||||
time series. Compared to classical approaches - fitting global
|
||||
parametric functions - loess substantially increases the domain of
|
||||
surfaces that can be estimated without distortion. Also, a pleasant
|
||||
fact about loess is that analogues of the statistical procedures used
|
||||
in parametric function fitting - for example, ANOVA and t intervals -
|
||||
involve statistics whose distributions are well approximated by
|
||||
familiar distributions.
|
||||
|
||||
The follwing files are included in this distribution.
|
||||
README the instruction file you are reading now
|
||||
S.h header file
|
||||
air.c C source for air data example
|
||||
changes history of changes to loess
|
||||
depend.ps PostScript figure of how routines are related
|
||||
ethanol.c C source for ethanol data example
|
||||
galaxy.c C source for galaxy data example
|
||||
gas.c C source for gas data example
|
||||
loess.c C source (high-level loess routines)
|
||||
loess.h header file for loess_struct and predict_struct
|
||||
loess.m manual page for user-callable loess routines
|
||||
loessc.c C source (low-level loess routines)
|
||||
loessf.f FORTRAN source (low-level loess & predict routines)
|
||||
loessf.m documentation for FORTRAN source
|
||||
madeup.c C source for madeup data example
|
||||
makefile makefile to compile the example codes
|
||||
misc.c C source (anova, pointwise, and other support routines)
|
||||
predict.c C source (high-level predict routines)
|
||||
predict.m manual page for user-callable predict routines
|
||||
struct.m manual page for loess_struct, pred_struct
|
||||
supp.f supplemental Fortran loess drivers
|
||||
|
||||
After unpacking these files, just type "make" and if all goes well
|
||||
you should see output like:
|
||||
|
||||
loess(&gas):
|
||||
Number of Observations: 22
|
||||
Equivalent Number of Parameters: 5.5
|
||||
Residual Standard Error: 0.3404
|
||||
|
||||
loess(&gas_null):
|
||||
Number of Observations: 22
|
||||
Equivalent Number of Parameters: 3.5
|
||||
Residual Standard Error: 0.5197
|
||||
|
||||
predict(gas_fit_E, m, &gas, &gas_pred):
|
||||
1.19641 5.06875 0.523682
|
||||
|
||||
pointwise(&gas_pred, m, coverage, &gas_ci):
|
||||
1.98562 4.10981 5.48023 5.56651 3.52761 1.71062 1.47205
|
||||
1.19641 3.6795 5.05571 5.13526 3.14366 1.19693 0.523682
|
||||
0.407208 3.24919 4.63119 4.70401 2.7597 0.683247 -0.424684
|
||||
|
||||
anova(&gas_null, &gas, &gas_anova):
|
||||
2.5531 15.663 10.1397 0.000860102
|
||||
|
||||
To run other examples, simply type "make galaxy", or "make ethanol", etc.
|
||||
|
||||
If your loader complains about "-llinpack -lblas" in the makefile, change
|
||||
it to whatever your system prefers for accessing Linpack and the Blas.
|
||||
If necessary, these Fortran subroutines can be obtained by
|
||||
mail netlib@netlib.bell-labs.com
|
||||
send dnrm2 dsvdc dqrdc ddot dqrsl idamax from linpack core.
|
||||
|
||||
A 50 page user guide, in PostScript form, is available by anonymous ftp.
|
||||
ftp netlib.bell-labs.com
|
||||
login: anonymous
|
||||
password: <your email address>
|
||||
binary
|
||||
cd /netlib/a
|
||||
get cloess.ps.Z
|
||||
quit
|
||||
uncompress cloess.ps
|
||||
This guide describes crucial steps in the proper analysis of data using
|
||||
loess. Please read it.
|
||||
|
||||
Bug reports are appreciated. Send electronic mail to
|
||||
ehg@netlib.bell-labs.com
|
||||
including the words "this is not spam" in the Subject line
|
||||
or send paper mail to
|
||||
Eric Grosse
|
||||
Bell Labs 2T-502
|
||||
Murray Hill NJ 07974
|
||||
for problems with the Fortran inner core of the algorithm.
|
||||
The C drivers were written by Ming-Jen Shyu, who left Bell Labs. Eric will
|
||||
fix problems with them when he can.
|
||||
|
||||
Remember that this is experimental software distributed free of charge
|
||||
and comes with no warranty! Exercise professional caution.
|
||||
|
||||
Happy Smoothing!
|
||||
|
||||
/*
|
||||
* The authors of this software are Cleveland, Grosse, and Shyu.
|
||||
* Copyright (c) 1989, 1992 by AT&T.
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose without fee is hereby granted, provided that this entire notice
|
||||
* is included in all copies of any software which is or includes a copy
|
||||
* or modification of this software and in all copies of the supporting
|
||||
* documentation for such software.
|
||||
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY
|
||||
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
31
pmsco/loess/S.h
Normal file
31
pmsco/loess/S.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#define Calloc(n,t) (t *)calloc((unsigned)(n),sizeof(t))
|
||||
#define Free(p) free((char *)(p))
|
||||
|
||||
/* the mapping from f77 to C intermediate code -- may be machine dependent
|
||||
* the first definition satisfies lint's narrowminded preprocessing & should
|
||||
* stay the same for all implementations. The __STDC__ definition is for
|
||||
* ANSI standard conforming C compilers. The #else definition should
|
||||
* generate the version of the fortran subroutine & common block names x
|
||||
* handed to the local loader; e.g., "x_" in system V, Berkeley & 9th edition
|
||||
*/
|
||||
|
||||
#ifdef lint
|
||||
#define F77_SUB(x) x
|
||||
#define F77_COM(x) x
|
||||
#else
|
||||
#ifdef __STDC__
|
||||
#define F77_SUB(x) x##_
|
||||
#define F77_COM(x) x##_
|
||||
#else
|
||||
#define F77_SUB(x) x/**/_
|
||||
#define F77_COM(x) x/**/_
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define NULL_ENTRY ((int *)NULL)
|
||||
|
||||
1
pmsco/loess/__init__.py
Normal file
1
pmsco/loess/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__author__ = 'matthias muntwiler'
|
||||
78
pmsco/loess/air.c
Normal file
78
pmsco/loess/air.c
Normal file
@@ -0,0 +1,78 @@
|
||||
#include <stdio.h>
|
||||
#include "loess.h"
|
||||
|
||||
struct loess_struct air;
|
||||
double ozone[] = {3.44821724038273, 3.30192724889463, 2.28942848510666,
|
||||
2.6207413942089, 2.84386697985157, 2.66840164872194, 2,
|
||||
2.51984209978975, 2.22398009056931, 2.41014226417523,
|
||||
2.6207413942089, 2.41014226417523, 3.23961180127748,
|
||||
1.81712059283214, 3.10723250595386, 2.22398009056931, 1,
|
||||
2.22398009056931, 1.5874010519682, 3.1748021039364,
|
||||
2.84386697985157, 3.55689330449006, 4.86294413109428,
|
||||
3.33222185164595, 3.07231682568585, 4.14081774942285,
|
||||
3.39121144301417, 2.84386697985157, 2.75892417638112,
|
||||
3.33222185164595, 2.71441761659491, 2.28942848510666,
|
||||
2.35133468772076, 5.12992784003009, 3.65930571002297,
|
||||
3.1748021039364, 4, 3.41995189335339, 4.25432086511501,
|
||||
4.59470089220704, 4.59470089220704, 4.39682967215818,
|
||||
2.15443469003188, 3, 1.91293118277239, 3.63424118566428,
|
||||
3.27106631018859, 3.93649718310217, 4.29084042702621,
|
||||
3.97905720789639, 2.51984209978975, 4.30886938006377,
|
||||
4.7622031559046, 2.71441761659491, 3.73251115681725,
|
||||
4.34448148576861, 3.68403149864039, 4, 3.89299641587326,
|
||||
3.39121144301417, 2.0800838230519, 2.51984209978975,
|
||||
4.9596756638423, 4.46474509558454, 4.79141985706278,
|
||||
3.53034833532606, 3.03658897187566, 4.02072575858906,
|
||||
2.80203933065539, 3.89299641587326, 2.84386697985157,
|
||||
3.14138065239139, 3.53034833532606, 2.75892417638112,
|
||||
2.0800838230519, 3.55689330449006, 5.51784835276224,
|
||||
4.17933919638123, 4.23582358425489, 4.90486813152402,
|
||||
4.37951913988789, 4.39682967215818, 4.57885697021333,
|
||||
4.27265868169792, 4.17933919638123, 4.49794144527541,
|
||||
3.60882608013869, 3.1748021039364, 2.71441761659491,
|
||||
2.84386697985157, 2.75892417638112, 2.88449914061482,
|
||||
3.53034833532606, 2.75892417638112, 3.03658897187566,
|
||||
2.0800838230519, 2.35133468772076, 3.58304787101595,
|
||||
2.6207413942089, 2.35133468772076, 2.88449914061482,
|
||||
2.51984209978975, 2.35133468772076, 2.84386697985157,
|
||||
3.30192724889463, 1.91293118277239, 2.41014226417523,
|
||||
3.10723250595386, 2.41014226417523, 2.6207413942089,
|
||||
2.71441761659491};
|
||||
double rad_temp_wind[] = {190, 118, 149, 313, 299, 99, 19, 256, 290, 274, 65,
|
||||
334, 307, 78, 322, 44, 8, 320, 25, 92, 13, 252, 223, 279, 127,
|
||||
291, 323, 148, 191, 284, 37, 120, 137, 269, 248, 236, 175,
|
||||
314, 276, 267, 272, 175, 264, 175, 48, 260, 274, 285, 187,
|
||||
220, 7, 294, 223, 81, 82, 213, 275, 253, 254, 83, 24, 77, 255,
|
||||
229, 207, 192, 273, 157, 71, 51, 115, 244, 190, 259, 36, 212,
|
||||
238, 215, 203, 225, 237, 188, 167, 197, 183, 189, 95, 92, 252,
|
||||
220, 230, 259, 236, 259, 238, 24, 112, 237, 224, 27, 238, 201,
|
||||
238, 14, 139, 49, 20, 193, 191, 131, 223,
|
||||
67, 72, 74, 62, 65, 59, 61, 69, 66, 68, 58, 64, 66, 57, 68,
|
||||
62, 59, 73, 61, 61, 67, 81, 79, 76, 82, 90, 87, 82, 77, 72,
|
||||
65, 73, 76, 84, 85, 81, 83, 83, 88, 92, 92, 89, 73, 81, 80,
|
||||
81, 82, 84, 87, 85, 74, 86, 85, 82, 86, 88, 86, 83, 81, 81,
|
||||
81, 82, 89, 90, 90, 86, 82, 80, 77, 79, 76, 78, 78, 77, 72,
|
||||
79, 81, 86, 97, 94, 96, 94, 91, 92, 93, 93, 87, 84, 80, 78,
|
||||
75, 73, 81, 76, 77, 71, 71, 78, 67, 76, 68, 82, 64, 71, 81,
|
||||
69, 63, 70, 75, 76, 68,
|
||||
7.4, 8, 12.6, 11.5, 8.6, 13.8, 20.1, 9.7, 9.2, 10.9, 13.2,
|
||||
11.5, 12, 18.4, 11.5, 9.7, 9.7, 16.6, 9.7, 12, 12, 14.9, 5.7,
|
||||
7.4, 9.7, 13.8, 11.5, 8, 14.9, 20.7, 9.2, 11.5, 10.3, 4, 9.2,
|
||||
9.2, 4.6, 10.9, 5.1, 6.3, 5.7, 7.4, 14.3, 14.9, 14.3, 6.9,
|
||||
10.3, 6.3, 5.1, 11.5, 6.9, 8.6, 8, 8.6, 12, 7.4, 7.4, 7.4,
|
||||
9.2, 6.9, 13.8, 7.4, 4, 10.3, 8, 11.5, 11.5, 9.7, 10.3, 6.3,
|
||||
7.4, 10.9, 10.3, 15.5, 14.3, 9.7, 3.4, 8, 9.7, 2.3, 6.3, 6.3,
|
||||
6.9, 5.1, 2.8, 4.6, 7.4, 15.5, 10.9, 10.3, 10.9, 9.7, 14.9,
|
||||
15.5, 6.3, 10.9, 11.5, 6.9, 13.8, 10.3, 10.3, 8, 12.6, 9.2,
|
||||
10.3, 10.3, 16.6, 6.9, 14.3, 8, 11.5};
|
||||
long n = 111, p = 3;
|
||||
|
||||
main() {
|
||||
printf("\nloess(&air):\n");
|
||||
loess_setup(rad_temp_wind, ozone, n, p, &air);
|
||||
air.model.span = 0.8;
|
||||
loess(&air);
|
||||
loess_summary(&air);
|
||||
|
||||
loess_free_mem(&air);
|
||||
}
|
||||
168
pmsco/loess/changes
Normal file
168
pmsco/loess/changes
Normal file
@@ -0,0 +1,168 @@
|
||||
CHANGES PLANNED SOMEDAY
|
||||
1) more vertices in k-d tree for dimension > 2, to get continuity.
|
||||
2) triangulation based method.
|
||||
----------------------
|
||||
|
||||
19 Nov 1987 workspace not big enough for degree=2
|
||||
|
||||
22 Jan 1988 switched from depth first to breadth first tree build
|
||||
|
||||
14 Mar 1988 lostt.3 extra space needed if (method mod 1000 = 0),
|
||||
not the documented (method/1000=0)
|
||||
|
||||
28 Apr 1988 l2tr.g vval2 needed to be initialized to 0
|
||||
|
||||
galaxy smooth needs double precision on vax
|
||||
|
||||
26 May 1988 bbox.g add 10% margin to allow limited extrapolation
|
||||
|
||||
6 June 1988 loess/lostt.f trL wasn't set if method/1000==0
|
||||
|
||||
10 June 1988 losave, loread
|
||||
|
||||
v(RCOND) 1 / max condition number
|
||||
|
||||
12 June 1988 lofort
|
||||
|
||||
21 June 1988 additional workspace for explicit L
|
||||
|
||||
27 June 1988 workspace checking in lowesf was slightly pessimistic
|
||||
|
||||
30 June 1988 Changed default fdiam to 0.
|
||||
Added warning messages for memory limits and pseudoinverse.
|
||||
|
||||
4 Aug 1988 bbox.g changed margin from 10% to 0.5%.
|
||||
|
||||
24 Aug 1988 loser documentation should have specified workspace
|
||||
of size ...+m*n, not ...+m**2.
|
||||
|
||||
Sep 1988
|
||||
loess-based approximations of delta1,2.
|
||||
pseudo-values, so statistics are available with robustness iterations.
|
||||
reorganize error messages to better fit into S.
|
||||
sample driver program.
|
||||
somewhat shorter code generated by ehg170.
|
||||
|
||||
20 Dec 1988
|
||||
workspace in loser
|
||||
|
||||
27 Jan 1989
|
||||
workspace checking in lostt was a bit pessimistic.
|
||||
|
||||
3 Feb 1989
|
||||
l2fit, l2tr: error message should contain sqrt(rho)
|
||||
|
||||
18 Dec 1989
|
||||
ehg141, ehg179-ehg181: new delta approximations
|
||||
|
||||
24 Jan 1990
|
||||
master copy moved from Sun3/180 to SGI 4D/240S
|
||||
(no intentional changes)
|
||||
|
||||
25 Jan 1990
|
||||
(many routines touched; ehg127 added) cleaned up computational
|
||||
kernel, added provision for only first dd<=d variables to enter
|
||||
the distance calculation ("conditionally parametric variables"),
|
||||
added independent bounds on total and componentwise degree for
|
||||
local polynomial model, made extrapolation warning message print
|
||||
a bit more detail.
|
||||
|
||||
14 Mar 1990
|
||||
added setLf argument to lowesd; added lowesr, lowesl for resmoothing.
|
||||
|
||||
-------------------------------------------------------
|
||||
Converting to the new version of loess
|
||||
5 April 1990
|
||||
|
||||
Over the past few months, a number of changes have been made to the
|
||||
loess package, to provide more control over the local model, to allow
|
||||
conditionally parametric variables, and to return exact statistical
|
||||
quantities for the blending method. Unlike earlier internal
|
||||
algorithmic improvements, this round of changes added some extra
|
||||
arguments in the Fortran calling sequences. The purpose of this note
|
||||
is to assist in converting programs that called the old version.
|
||||
|
||||
An explicit argument setLf has been added to lowesd(), since it affects
|
||||
the partitioning of the workspace. To help protect against inadvertent
|
||||
version mismatches, the version number that lowesd() checks has also
|
||||
been changed. The componentwise degree and the specification of
|
||||
conditionally nonparametric variables can be changed from the default
|
||||
by modifying iv(CDEG) and iv(NDIST).
|
||||
|
||||
The influence matrix L for blending is now explicitly available by
|
||||
calling a new subroutine lowesl(), but this loses the speed
|
||||
advantage of blending. A faster, sometimes equivalent method is
|
||||
to use the influence matrix that carries data values to coefficients
|
||||
at the vertices of the k-d tree. This information is saved in iv(iv(Lq))
|
||||
and v(iv(Lf)), for the afficionado.
|
||||
|
||||
The new subroutine lowesr() takes advantage of Lq and Lf to allow rapid
|
||||
resmoothing for applications when only y, not x, is subject to change.
|
||||
-------------------------------------------------------
|
||||
|
||||
7 May 1990
|
||||
new delta approximations.
|
||||
added prior weights to input format for sample driver.
|
||||
|
||||
29 May 1990
|
||||
loess,lostt,loser,pseudo moved from Fortran to S.
|
||||
|
||||
11 Jul 1990
|
||||
column equilibration, so pseudoinverse is needed less often.
|
||||
|
||||
27 May 1991
|
||||
lowesd version 105; increased nvmax,ncmax to max(200,n).
|
||||
l2fit added ihat=1 (diagL only).
|
||||
ehg133,lowese removed unused arguments dist,eta.
|
||||
ehg190,ehg141 changed name to lowesa, slight change to calling sequence.
|
||||
ehg144 changed name to lowesc
|
||||
m9rwt changed name to lowesw
|
||||
pseudo changed name to lowesp
|
||||
|
||||
22 Jul 1991 IMPORTANT BUG FIX!
|
||||
ehg131 vval2 should be dimensioned 0:d, not 0:8
|
||||
|
||||
26 Jul 1991
|
||||
lowesd change calling sequence to provide tighter memory allocation
|
||||
diff old/man/internal new/man/internal
|
||||
< lowesd(105,iv,liv,lv,v,d,n,f,tdeg,setLf) setup workspace
|
||||
> lowesd(106,iv,liv,lv,v,d,n,f,tdeg,nvmax,setLf) setup workspace
|
||||
< liv 50+(2^d+6)*max(200,n)
|
||||
< if setLf, add nf*max(200,n)
|
||||
< lv 50+(3*d+4)*max(200,n)+(tau+2)*nf
|
||||
< if setLf, add (d+1)*nf*max(200,n)
|
||||
> liv 50+(2^d+6)*nvmax
|
||||
> if setLf, add nf*nvmax
|
||||
> lv 50+(3*d+4)*nvmax+(tau+2)*nf
|
||||
> if setLf, add (d+1)*nf*nvmax
|
||||
> nvmax limit on number of vertices for kd-tree; e.g. max(200,n)
|
||||
|
||||
20 Sep 1991
|
||||
sample.f brought in sync with recent loess changes.
|
||||
|
||||
24 Dec 1991
|
||||
l2fit.f fixed comment in single precision version
|
||||
|
||||
10 Jan 1992
|
||||
ehg197.f new formula for approximating trL, valid for small f
|
||||
|
||||
15 May 1992
|
||||
netlib/a/dloess now includes C drivers (written by Ming-Jen Shyu,
|
||||
adapted from code used inside the S system)
|
||||
|
||||
22 Jun 1992
|
||||
ehg191.f Loop 11 ran too far, picking up one more value than necessary.
|
||||
The value was not used, so the loess computation itself is unaffected,
|
||||
but on some systems the old code could conceivably cause a reference
|
||||
to an invalid memory address and abort with a segmentation fault
|
||||
message.
|
||||
|
||||
23 Jun 1992
|
||||
S.h #include <math.h>, since loessc.c calls floor() and pow().
|
||||
|
||||
18 Aug 1992
|
||||
netlib/a/dloess A new release with bug fixes in all the C drivers, new
|
||||
example codes, and detail documentations.
|
||||
|
||||
25 Mar 1996
|
||||
predict.c fix enormous memory leak. update email address
|
||||
33320
pmsco/loess/cloess.ps
Normal file
33320
pmsco/loess/cloess.ps
Normal file
File diff suppressed because it is too large
Load Diff
117
pmsco/loess/depend.ps
Normal file
117
pmsco/loess/depend.ps
Normal file
@@ -0,0 +1,117 @@
|
||||
%!
|
||||
/Courier-Bold findfont 10 scalefont setfont
|
||||
%draw a box
|
||||
%x y width height box
|
||||
/box { newpath
|
||||
/height exch def
|
||||
/width exch def
|
||||
/y exch def
|
||||
/x exch def
|
||||
x width 2 div sub
|
||||
y height 2 div sub moveto
|
||||
width 0 rlineto
|
||||
0 height rlineto
|
||||
width neg 0 rlineto
|
||||
closepath } def
|
||||
|
||||
%draw a circle
|
||||
%x y radius circle
|
||||
/circle { newpath 0 360 arc } def
|
||||
|
||||
%draw an ellipse
|
||||
%x y width height ellipse
|
||||
/ellipse { gsave
|
||||
/height exch def
|
||||
/width exch def
|
||||
1 height width div scale
|
||||
width height div mul
|
||||
width 2 div
|
||||
circle stroke
|
||||
grestore } def
|
||||
|
||||
%draw a centered label
|
||||
%x y str
|
||||
/label {
|
||||
/str exch def
|
||||
/y exch def
|
||||
/x exch def
|
||||
str stringwidth
|
||||
pop /width exch def
|
||||
x width 2 div sub
|
||||
y 10 3 div sub moveto str show
|
||||
} def
|
||||
|
||||
%draw a line
|
||||
%x1 y1 x2 y2 drawline
|
||||
/drawline { 4 -2 roll moveto lineto stroke } def
|
||||
|
||||
277 684 42 14 box stroke
|
||||
277 684 (lowesd) label
|
||||
349 630 42 14 box stroke
|
||||
349 630 (lowesf) label
|
||||
205 630 42 14 box stroke
|
||||
205 630 (lowesb) label
|
||||
155 565 42 14 box stroke
|
||||
155 565 (lowesr) label
|
||||
146 427 42 14 box stroke
|
||||
146 427 (lowese) label
|
||||
277 576 42 14 box stroke
|
||||
277 576 (lowesl) label
|
||||
203 464 42 14 box stroke
|
||||
203 464 (lofort) label
|
||||
81 576 42 14 box stroke
|
||||
81 576 (losave) label
|
||||
81 522 42 14 box stroke
|
||||
81 522 (lohead) label
|
||||
81 468 42 14 box stroke
|
||||
81 468 (loread) label
|
||||
405 540 42 14 box stroke
|
||||
405 540 (lowesa) label
|
||||
342 539 42 14 box stroke
|
||||
342 539 (lowesc) label
|
||||
92 461 134 434 drawline
|
||||
124.266363 435.502104 134.000000 434.000000 drawline
|
||||
134.000000 434.000000 128.592424 442.231532 drawline
|
||||
81 515 81 475 drawline
|
||||
77.000000 484.000000 81.000000 475.000000 drawline
|
||||
81.000000 475.000000 85.000000 484.000000 drawline
|
||||
81 569 81 529 drawline
|
||||
77.000000 538.000000 81.000000 529.000000 drawline
|
||||
81.000000 529.000000 85.000000 538.000000 drawline
|
||||
289 569 329 546 drawline
|
||||
319.203959 547.018615 329.000000 546.000000 drawline
|
||||
329.000000 546.000000 323.191728 553.953865 drawline
|
||||
154 558 146 434 drawline
|
||||
142.587739 443.238857 146.000000 434.000000 drawline
|
||||
146.000000 434.000000 150.571142 442.723799 drawline
|
||||
188 623 97 583 drawline
|
||||
103.629564 590.283466 97.000000 583.000000 drawline
|
||||
97.000000 583.000000 106.848776 582.959760 drawline
|
||||
204 623 203 471 drawline
|
||||
199.059296 480.026120 203.000000 471.000000 drawline
|
||||
203.000000 471.000000 207.059123 479.973490 drawline
|
||||
214 623 267 583 drawline
|
||||
257.406670 585.228906 267.000000 583.000000 drawline
|
||||
267.000000 583.000000 262.225925 591.614419 drawline
|
||||
199 623 160 572 drawline
|
||||
162.289620 581.579021 160.000000 572.000000 drawline
|
||||
160.000000 572.000000 168.644482 576.719420 drawline
|
||||
220 623 389 547 drawline
|
||||
379.151237 547.043173 389.000000 547.000000 drawline
|
||||
389.000000 547.000000 382.432359 554.339352 drawline
|
||||
202 623 148 434 drawline
|
||||
146.626394 443.752600 148.000000 434.000000 drawline
|
||||
148.000000 434.000000 154.318586 441.554831 drawline
|
||||
348 623 342 546 drawline
|
||||
338.711268 555.283547 342.000000 546.000000 drawline
|
||||
342.000000 546.000000 346.687091 554.662054 drawline
|
||||
353 623 400 547 drawline
|
||||
391.864262 552.550655 400.000000 547.000000 drawline
|
||||
400.000000 547.000000 398.668290 556.758409 drawline
|
||||
267 677 214 637 drawline
|
||||
218.774075 645.614419 214.000000 637.000000 drawline
|
||||
214.000000 637.000000 223.593330 639.228906 drawline
|
||||
286 677 339 637 drawline
|
||||
329.406670 639.228906 339.000000 637.000000 drawline
|
||||
339.000000 637.000000 334.225925 645.614419 drawline
|
||||
showpage
|
||||
274
pmsco/loess/dqrsl.f
Normal file
274
pmsco/loess/dqrsl.f
Normal file
@@ -0,0 +1,274 @@
|
||||
subroutine dqrsl(x,ldx,n,k,qraux,y,qy,qty,b,rsd,xb,job,info)
|
||||
integer ldx,n,k,job,info
|
||||
double precision x(ldx,1),qraux(1),y(1),qy(1),qty(1),b(1),rsd(1),
|
||||
* xb(1)
|
||||
c
|
||||
c dqrsl applies the output of dqrdc to compute coordinate
|
||||
c transformations, projections, and least squares solutions.
|
||||
c for k .le. min(n,p), let xk be the matrix
|
||||
c
|
||||
c xk = (x(jpvt(1)),x(jpvt(2)), ... ,x(jpvt(k)))
|
||||
c
|
||||
c formed from columnns jpvt(1), ... ,jpvt(k) of the original
|
||||
c n x p matrix x that was input to dqrdc (if no pivoting was
|
||||
c done, xk consists of the first k columns of x in their
|
||||
c original order). dqrdc produces a factored orthogonal matrix q
|
||||
c and an upper triangular matrix r such that
|
||||
c
|
||||
c xk = q * (r)
|
||||
c (0)
|
||||
c
|
||||
c this information is contained in coded form in the arrays
|
||||
c x and qraux.
|
||||
c
|
||||
c on entry
|
||||
c
|
||||
c x double precision(ldx,p).
|
||||
c x contains the output of dqrdc.
|
||||
c
|
||||
c ldx integer.
|
||||
c ldx is the leading dimension of the array x.
|
||||
c
|
||||
c n integer.
|
||||
c n is the number of rows of the matrix xk. it must
|
||||
c have the same value as n in dqrdc.
|
||||
c
|
||||
c k integer.
|
||||
c k is the number of columns of the matrix xk. k
|
||||
c must nnot be greater than min(n,p), where p is the
|
||||
c same as in the calling sequence to dqrdc.
|
||||
c
|
||||
c qraux double precision(p).
|
||||
c qraux contains the auxiliary output from dqrdc.
|
||||
c
|
||||
c y double precision(n)
|
||||
c y contains an n-vector that is to be manipulated
|
||||
c by dqrsl.
|
||||
c
|
||||
c job integer.
|
||||
c job specifies what is to be computed. job has
|
||||
c the decimal expansion abcde, with the following
|
||||
c meaning.
|
||||
c
|
||||
c if a.ne.0, compute qy.
|
||||
c if b,c,d, or e .ne. 0, compute qty.
|
||||
c if c.ne.0, compute b.
|
||||
c if d.ne.0, compute rsd.
|
||||
c if e.ne.0, compute xb.
|
||||
c
|
||||
c note that a request to compute b, rsd, or xb
|
||||
c automatically triggers the computation of qty, for
|
||||
c which an array must be provided in the calling
|
||||
c sequence.
|
||||
c
|
||||
c on return
|
||||
c
|
||||
c qy double precision(n).
|
||||
c qy conntains q*y, if its computation has been
|
||||
c requested.
|
||||
c
|
||||
c qty double precision(n).
|
||||
c qty contains trans(q)*y, if its computation has
|
||||
c been requested. here trans(q) is the
|
||||
c transpose of the matrix q.
|
||||
c
|
||||
c b double precision(k)
|
||||
c b contains the solution of the least squares problem
|
||||
c
|
||||
c minimize norm2(y - xk*b),
|
||||
c
|
||||
c if its computation has been requested. (note that
|
||||
c if pivoting was requested in dqrdc, the j-th
|
||||
c component of b will be associated with column jpvt(j)
|
||||
c of the original matrix x that was input into dqrdc.)
|
||||
c
|
||||
c rsd double precision(n).
|
||||
c rsd contains the least squares residual y - xk*b,
|
||||
c if its computation has been requested. rsd is
|
||||
c also the orthogonal projection of y onto the
|
||||
c orthogonal complement of the column space of xk.
|
||||
c
|
||||
c xb double precision(n).
|
||||
c xb contains the least squares approximation xk*b,
|
||||
c if its computation has been requested. xb is also
|
||||
c the orthogonal projection of y onto the column space
|
||||
c of x.
|
||||
c
|
||||
c info integer.
|
||||
c info is zero unless the computation of b has
|
||||
c been requested and r is exactly singular. in
|
||||
c this case, info is the index of the first zero
|
||||
c diagonal element of r and b is left unaltered.
|
||||
c
|
||||
c the parameters qy, qty, b, rsd, and xb are not referenced
|
||||
c if their computation is not requested and in this case
|
||||
c can be replaced by dummy variables in the calling program.
|
||||
c to save storage, the user may in some cases use the same
|
||||
c array for different parameters in the calling sequence. a
|
||||
c frequently occuring example is when one wishes to compute
|
||||
c any of b, rsd, or xb and does not need y or qty. in this
|
||||
c case one may identify y, qty, and one of b, rsd, or xb, while
|
||||
c providing separate arrays for anything else that is to be
|
||||
c computed. thus the calling sequence
|
||||
c
|
||||
c call dqrsl(x,ldx,n,k,qraux,y,dum,y,b,y,dum,110,info)
|
||||
c
|
||||
c will result in the computation of b and rsd, with rsd
|
||||
c overwriting y. more generally, each item in the following
|
||||
c list contains groups of permissible identifications for
|
||||
c a single callinng sequence.
|
||||
c
|
||||
c 1. (y,qty,b) (rsd) (xb) (qy)
|
||||
c
|
||||
c 2. (y,qty,rsd) (b) (xb) (qy)
|
||||
c
|
||||
c 3. (y,qty,xb) (b) (rsd) (qy)
|
||||
c
|
||||
c 4. (y,qy) (qty,b) (rsd) (xb)
|
||||
c
|
||||
c 5. (y,qy) (qty,rsd) (b) (xb)
|
||||
c
|
||||
c 6. (y,qy) (qty,xb) (b) (rsd)
|
||||
c
|
||||
c in any group the value returned in the array allocated to
|
||||
c the group corresponds to the last member of the group.
|
||||
c
|
||||
c linpack. this version dated 08/14/78 .
|
||||
c g.w. stewart, university of maryland, argonne national lab.
|
||||
c
|
||||
c dqrsl uses the following functions and subprograms.
|
||||
c
|
||||
c blas daxpy,dcopy,ddot
|
||||
c fortran dabs,min0,mod
|
||||
c
|
||||
c internal variables
|
||||
c
|
||||
integer i,j,jj,ju,kp1
|
||||
double precision ddot,t,temp
|
||||
logical cb,cqy,cqty,cr,cxb
|
||||
c
|
||||
c
|
||||
c set info flag.
|
||||
c
|
||||
info = 0
|
||||
c
|
||||
c determine what is to be computed.
|
||||
c
|
||||
cqy = job/10000 .ne. 0
|
||||
cqty = mod(job,10000) .ne. 0
|
||||
cb = mod(job,1000)/100 .ne. 0
|
||||
cr = mod(job,100)/10 .ne. 0
|
||||
cxb = mod(job,10) .ne. 0
|
||||
ju = min0(k,n-1)
|
||||
c
|
||||
c special action when n=1.
|
||||
c
|
||||
if (ju .ne. 0) go to 40
|
||||
if (cqy) qy(1) = y(1)
|
||||
if (cqty) qty(1) = y(1)
|
||||
if (cxb) xb(1) = y(1)
|
||||
if (.not.cb) go to 30
|
||||
if (x(1,1) .ne. 0.0d0) go to 10
|
||||
info = 1
|
||||
go to 20
|
||||
10 continue
|
||||
b(1) = y(1)/x(1,1)
|
||||
20 continue
|
||||
30 continue
|
||||
if (cr) rsd(1) = 0.0d0
|
||||
go to 250
|
||||
40 continue
|
||||
c
|
||||
c set up to compute qy or qty.
|
||||
c
|
||||
if (cqy) call dcopy(n,y,1,qy,1)
|
||||
if (cqty) call dcopy(n,y,1,qty,1)
|
||||
if (.not.cqy) go to 70
|
||||
c
|
||||
c compute qy.
|
||||
c
|
||||
do 60 jj = 1, ju
|
||||
j = ju - jj + 1
|
||||
if (qraux(j) .eq. 0.0d0) go to 50
|
||||
temp = x(j,j)
|
||||
x(j,j) = qraux(j)
|
||||
t = -ddot(n-j+1,x(j,j),1,qy(j),1)/x(j,j)
|
||||
call daxpy(n-j+1,t,x(j,j),1,qy(j),1)
|
||||
x(j,j) = temp
|
||||
50 continue
|
||||
60 continue
|
||||
70 continue
|
||||
if (.not.cqty) go to 100
|
||||
c
|
||||
c compute trans(q)*y.
|
||||
c
|
||||
do 90 j = 1, ju
|
||||
if (qraux(j) .eq. 0.0d0) go to 80
|
||||
temp = x(j,j)
|
||||
x(j,j) = qraux(j)
|
||||
t = -ddot(n-j+1,x(j,j),1,qty(j),1)/x(j,j)
|
||||
call daxpy(n-j+1,t,x(j,j),1,qty(j),1)
|
||||
x(j,j) = temp
|
||||
80 continue
|
||||
90 continue
|
||||
100 continue
|
||||
c
|
||||
c set up to compute b, rsd, or xb.
|
||||
c
|
||||
if (cb) call dcopy(k,qty,1,b,1)
|
||||
kp1 = k + 1
|
||||
if (cxb) call dcopy(k,qty,1,xb,1)
|
||||
if (cr .and. k .lt. n) call dcopy(n-k,qty(kp1),1,rsd(kp1),1)
|
||||
if (.not.cxb .or. kp1 .gt. n) go to 120
|
||||
do 110 i = kp1, n
|
||||
xb(i) = 0.0d0
|
||||
110 continue
|
||||
120 continue
|
||||
if (.not.cr) go to 140
|
||||
do 130 i = 1, k
|
||||
rsd(i) = 0.0d0
|
||||
130 continue
|
||||
140 continue
|
||||
if (.not.cb) go to 190
|
||||
c
|
||||
c compute b.
|
||||
c
|
||||
do 170 jj = 1, k
|
||||
j = k - jj + 1
|
||||
if (x(j,j) .ne. 0.0d0) go to 150
|
||||
info = j
|
||||
c ......exit
|
||||
go to 180
|
||||
150 continue
|
||||
b(j) = b(j)/x(j,j)
|
||||
if (j .eq. 1) go to 160
|
||||
t = -b(j)
|
||||
call daxpy(j-1,t,x(1,j),1,b,1)
|
||||
160 continue
|
||||
170 continue
|
||||
180 continue
|
||||
190 continue
|
||||
if (.not.cr .and. .not.cxb) go to 240
|
||||
c
|
||||
c compute rsd or xb as required.
|
||||
c
|
||||
do 230 jj = 1, ju
|
||||
j = ju - jj + 1
|
||||
if (qraux(j) .eq. 0.0d0) go to 220
|
||||
temp = x(j,j)
|
||||
x(j,j) = qraux(j)
|
||||
if (.not.cr) go to 200
|
||||
t = -ddot(n-j+1,x(j,j),1,rsd(j),1)/x(j,j)
|
||||
call daxpy(n-j+1,t,x(j,j),1,rsd(j),1)
|
||||
200 continue
|
||||
if (.not.cxb) go to 210
|
||||
t = -ddot(n-j+1,x(j,j),1,xb(j),1)/x(j,j)
|
||||
call daxpy(n-j+1,t,x(j,j),1,xb(j),1)
|
||||
210 continue
|
||||
x(j,j) = temp
|
||||
220 continue
|
||||
230 continue
|
||||
240 continue
|
||||
250 continue
|
||||
return
|
||||
end
|
||||
481
pmsco/loess/dsvdc.f
Normal file
481
pmsco/loess/dsvdc.f
Normal file
@@ -0,0 +1,481 @@
|
||||
subroutine dsvdc(x,ldx,n,p,s,e,u,ldu,v,ldv,work,job,info)
|
||||
integer ldx,n,p,ldu,ldv,job,info
|
||||
double precision x(ldx,1),s(1),e(1),u(ldu,1),v(ldv,1),work(1)
|
||||
c
|
||||
c
|
||||
c dsvdc is a subroutine to reduce a double precision nxp matrix x
|
||||
c by orthogonal transformations u and v to diagonal form. the
|
||||
c diagonal elements s(i) are the singular values of x. the
|
||||
c columns of u are the corresponding left singular vectors,
|
||||
c and the columns of v the right singular vectors.
|
||||
c
|
||||
c on entry
|
||||
c
|
||||
c x double precision(ldx,p), where ldx.ge.n.
|
||||
c x contains the matrix whose singular value
|
||||
c decomposition is to be computed. x is
|
||||
c destroyed by dsvdc.
|
||||
c
|
||||
c ldx integer.
|
||||
c ldx is the leading dimension of the array x.
|
||||
c
|
||||
c n integer.
|
||||
c n is the number of rows of the matrix x.
|
||||
c
|
||||
c p integer.
|
||||
c p is the number of columns of the matrix x.
|
||||
c
|
||||
c ldu integer.
|
||||
c ldu is the leading dimension of the array u.
|
||||
c (see below).
|
||||
c
|
||||
c ldv integer.
|
||||
c ldv is the leading dimension of the array v.
|
||||
c (see below).
|
||||
c
|
||||
c work double precision(n).
|
||||
c work is a scratch array.
|
||||
c
|
||||
c job integer.
|
||||
c job controls the computation of the singular
|
||||
c vectors. it has the decimal expansion ab
|
||||
c with the following meaning
|
||||
c
|
||||
c a.eq.0 do not compute the left singular
|
||||
c vectors.
|
||||
c a.eq.1 return the n left singular vectors
|
||||
c in u.
|
||||
c a.ge.2 return the first min(n,p) singular
|
||||
c vectors in u.
|
||||
c b.eq.0 do not compute the right singular
|
||||
c vectors.
|
||||
c b.eq.1 return the right singular vectors
|
||||
c in v.
|
||||
c
|
||||
c on return
|
||||
c
|
||||
c s double precision(mm), where mm=min(n+1,p).
|
||||
c the first min(n,p) entries of s contain the
|
||||
c singular values of x arranged in descending
|
||||
c order of magnitude.
|
||||
c
|
||||
c e double precision(p),
|
||||
c e ordinarily contains zeros. however see the
|
||||
c discussion of info for exceptions.
|
||||
c
|
||||
c u double precision(ldu,k), where ldu.ge.n. if
|
||||
c joba.eq.1 then k.eq.n, if joba.ge.2
|
||||
c then k.eq.min(n,p).
|
||||
c u contains the matrix of left singular vectors.
|
||||
c u is not referenced if joba.eq.0. if n.le.p
|
||||
c or if joba.eq.2, then u may be identified with x
|
||||
c in the subroutine call.
|
||||
c
|
||||
c v double precision(ldv,p), where ldv.ge.p.
|
||||
c v contains the matrix of right singular vectors.
|
||||
c v is not referenced if job.eq.0. if p.le.n,
|
||||
c then v may be identified with x in the
|
||||
c subroutine call.
|
||||
c
|
||||
c info integer.
|
||||
c the singular values (and their corresponding
|
||||
c singular vectors) s(info+1),s(info+2),...,s(m)
|
||||
c are correct (here m=min(n,p)). thus if
|
||||
c info.eq.0, all the singular values and their
|
||||
c vectors are correct. in any event, the matrix
|
||||
c b = trans(u)*x*v is the bidiagonal matrix
|
||||
c with the elements of s on its diagonal and the
|
||||
c elements of e on its super-diagonal (trans(u)
|
||||
c is the transpose of u). thus the singular
|
||||
c values of x and b are the same.
|
||||
c
|
||||
c linpack. this version dated 08/14/78 .
|
||||
c correction made to shift 2/84.
|
||||
c g.w. stewart, university of maryland, argonne national lab.
|
||||
c
|
||||
c dsvdc uses the following functions and subprograms.
|
||||
c
|
||||
c external drot
|
||||
c blas daxpy,ddot,dscal,dswap,dnrm2,drotg
|
||||
c fortran dabs,dmax1,max0,min0,mod,dsqrt
|
||||
c
|
||||
c internal variables
|
||||
c
|
||||
integer i,iter,j,jobu,k,kase,kk,l,ll,lls,lm1,lp1,ls,lu,m,maxit,
|
||||
* mm,mm1,mp1,nct,nctp1,ncu,nrt,nrtp1
|
||||
double precision ddot,t,r
|
||||
double precision b,c,cs,el,emm1,f,g,dnrm2,scale,shift,sl,sm,sn,
|
||||
* smm1,t1,test,ztest
|
||||
logical wantu,wantv
|
||||
c
|
||||
c
|
||||
c set the maximum number of iterations.
|
||||
c
|
||||
maxit = 30
|
||||
c
|
||||
c determine what is to be computed.
|
||||
c
|
||||
wantu = .false.
|
||||
wantv = .false.
|
||||
jobu = mod(job,100)/10
|
||||
ncu = n
|
||||
if (jobu .gt. 1) ncu = min0(n,p)
|
||||
if (jobu .ne. 0) wantu = .true.
|
||||
if (mod(job,10) .ne. 0) wantv = .true.
|
||||
c
|
||||
c reduce x to bidiagonal form, storing the diagonal elements
|
||||
c in s and the super-diagonal elements in e.
|
||||
c
|
||||
info = 0
|
||||
nct = min0(n-1,p)
|
||||
nrt = max0(0,min0(p-2,n))
|
||||
lu = max0(nct,nrt)
|
||||
if (lu .lt. 1) go to 170
|
||||
do 160 l = 1, lu
|
||||
lp1 = l + 1
|
||||
if (l .gt. nct) go to 20
|
||||
c
|
||||
c compute the transformation for the l-th column and
|
||||
c place the l-th diagonal in s(l).
|
||||
c
|
||||
s(l) = dnrm2(n-l+1,x(l,l),1)
|
||||
if (s(l) .eq. 0.0d0) go to 10
|
||||
if (x(l,l) .ne. 0.0d0) s(l) = dsign(s(l),x(l,l))
|
||||
call dscal(n-l+1,1.0d0/s(l),x(l,l),1)
|
||||
x(l,l) = 1.0d0 + x(l,l)
|
||||
10 continue
|
||||
s(l) = -s(l)
|
||||
20 continue
|
||||
if (p .lt. lp1) go to 50
|
||||
do 40 j = lp1, p
|
||||
if (l .gt. nct) go to 30
|
||||
if (s(l) .eq. 0.0d0) go to 30
|
||||
c
|
||||
c apply the transformation.
|
||||
c
|
||||
t = -ddot(n-l+1,x(l,l),1,x(l,j),1)/x(l,l)
|
||||
call daxpy(n-l+1,t,x(l,l),1,x(l,j),1)
|
||||
30 continue
|
||||
c
|
||||
c place the l-th row of x into e for the
|
||||
c subsequent calculation of the row transformation.
|
||||
c
|
||||
e(j) = x(l,j)
|
||||
40 continue
|
||||
50 continue
|
||||
if (.not.wantu .or. l .gt. nct) go to 70
|
||||
c
|
||||
c place the transformation in u for subsequent back
|
||||
c multiplication.
|
||||
c
|
||||
do 60 i = l, n
|
||||
u(i,l) = x(i,l)
|
||||
60 continue
|
||||
70 continue
|
||||
if (l .gt. nrt) go to 150
|
||||
c
|
||||
c compute the l-th row transformation and place the
|
||||
c l-th super-diagonal in e(l).
|
||||
c
|
||||
e(l) = dnrm2(p-l,e(lp1),1)
|
||||
if (e(l) .eq. 0.0d0) go to 80
|
||||
if (e(lp1) .ne. 0.0d0) e(l) = dsign(e(l),e(lp1))
|
||||
call dscal(p-l,1.0d0/e(l),e(lp1),1)
|
||||
e(lp1) = 1.0d0 + e(lp1)
|
||||
80 continue
|
||||
e(l) = -e(l)
|
||||
if (lp1 .gt. n .or. e(l) .eq. 0.0d0) go to 120
|
||||
c
|
||||
c apply the transformation.
|
||||
c
|
||||
do 90 i = lp1, n
|
||||
work(i) = 0.0d0
|
||||
90 continue
|
||||
do 100 j = lp1, p
|
||||
call daxpy(n-l,e(j),x(lp1,j),1,work(lp1),1)
|
||||
100 continue
|
||||
do 110 j = lp1, p
|
||||
call daxpy(n-l,-e(j)/e(lp1),work(lp1),1,x(lp1,j),1)
|
||||
110 continue
|
||||
120 continue
|
||||
if (.not.wantv) go to 140
|
||||
c
|
||||
c place the transformation in v for subsequent
|
||||
c back multiplication.
|
||||
c
|
||||
do 130 i = lp1, p
|
||||
v(i,l) = e(i)
|
||||
130 continue
|
||||
140 continue
|
||||
150 continue
|
||||
160 continue
|
||||
170 continue
|
||||
c
|
||||
c set up the final bidiagonal matrix or order m.
|
||||
c
|
||||
m = min0(p,n+1)
|
||||
nctp1 = nct + 1
|
||||
nrtp1 = nrt + 1
|
||||
if (nct .lt. p) s(nctp1) = x(nctp1,nctp1)
|
||||
if (n .lt. m) s(m) = 0.0d0
|
||||
if (nrtp1 .lt. m) e(nrtp1) = x(nrtp1,m)
|
||||
e(m) = 0.0d0
|
||||
c
|
||||
c if required, generate u.
|
||||
c
|
||||
if (.not.wantu) go to 300
|
||||
if (ncu .lt. nctp1) go to 200
|
||||
do 190 j = nctp1, ncu
|
||||
do 180 i = 1, n
|
||||
u(i,j) = 0.0d0
|
||||
180 continue
|
||||
u(j,j) = 1.0d0
|
||||
190 continue
|
||||
200 continue
|
||||
if (nct .lt. 1) go to 290
|
||||
do 280 ll = 1, nct
|
||||
l = nct - ll + 1
|
||||
if (s(l) .eq. 0.0d0) go to 250
|
||||
lp1 = l + 1
|
||||
if (ncu .lt. lp1) go to 220
|
||||
do 210 j = lp1, ncu
|
||||
t = -ddot(n-l+1,u(l,l),1,u(l,j),1)/u(l,l)
|
||||
call daxpy(n-l+1,t,u(l,l),1,u(l,j),1)
|
||||
210 continue
|
||||
220 continue
|
||||
call dscal(n-l+1,-1.0d0,u(l,l),1)
|
||||
u(l,l) = 1.0d0 + u(l,l)
|
||||
lm1 = l - 1
|
||||
if (lm1 .lt. 1) go to 240
|
||||
do 230 i = 1, lm1
|
||||
u(i,l) = 0.0d0
|
||||
230 continue
|
||||
240 continue
|
||||
go to 270
|
||||
250 continue
|
||||
do 260 i = 1, n
|
||||
u(i,l) = 0.0d0
|
||||
260 continue
|
||||
u(l,l) = 1.0d0
|
||||
270 continue
|
||||
280 continue
|
||||
290 continue
|
||||
300 continue
|
||||
c
|
||||
c if it is required, generate v.
|
||||
c
|
||||
if (.not.wantv) go to 350
|
||||
do 340 ll = 1, p
|
||||
l = p - ll + 1
|
||||
lp1 = l + 1
|
||||
if (l .gt. nrt) go to 320
|
||||
if (e(l) .eq. 0.0d0) go to 320
|
||||
do 310 j = lp1, p
|
||||
t = -ddot(p-l,v(lp1,l),1,v(lp1,j),1)/v(lp1,l)
|
||||
call daxpy(p-l,t,v(lp1,l),1,v(lp1,j),1)
|
||||
310 continue
|
||||
320 continue
|
||||
do 330 i = 1, p
|
||||
v(i,l) = 0.0d0
|
||||
330 continue
|
||||
v(l,l) = 1.0d0
|
||||
340 continue
|
||||
350 continue
|
||||
c
|
||||
c main iteration loop for the singular values.
|
||||
c
|
||||
mm = m
|
||||
iter = 0
|
||||
360 continue
|
||||
c
|
||||
c quit if all the singular values have been found.
|
||||
c
|
||||
c ...exit
|
||||
if (m .eq. 0) go to 620
|
||||
c
|
||||
c if too many iterations have been performed, set
|
||||
c flag and return.
|
||||
c
|
||||
if (iter .lt. maxit) go to 370
|
||||
info = m
|
||||
c ......exit
|
||||
go to 620
|
||||
370 continue
|
||||
c
|
||||
c this section of the program inspects for
|
||||
c negligible elements in the s and e arrays. on
|
||||
c completion the variables kase and l are set as follows.
|
||||
c
|
||||
c kase = 1 if s(m) and e(l-1) are negligible and l.lt.m
|
||||
c kase = 2 if s(l) is negligible and l.lt.m
|
||||
c kase = 3 if e(l-1) is negligible, l.lt.m, and
|
||||
c s(l), ..., s(m) are not negligible (qr step).
|
||||
c kase = 4 if e(m-1) is negligible (convergence).
|
||||
c
|
||||
do 390 ll = 1, m
|
||||
l = m - ll
|
||||
c ...exit
|
||||
if (l .eq. 0) go to 400
|
||||
test = dabs(s(l)) + dabs(s(l+1))
|
||||
ztest = test + dabs(e(l))
|
||||
if (ztest .ne. test) go to 380
|
||||
e(l) = 0.0d0
|
||||
c ......exit
|
||||
go to 400
|
||||
380 continue
|
||||
390 continue
|
||||
400 continue
|
||||
if (l .ne. m - 1) go to 410
|
||||
kase = 4
|
||||
go to 480
|
||||
410 continue
|
||||
lp1 = l + 1
|
||||
mp1 = m + 1
|
||||
do 430 lls = lp1, mp1
|
||||
ls = m - lls + lp1
|
||||
c ...exit
|
||||
if (ls .eq. l) go to 440
|
||||
test = 0.0d0
|
||||
if (ls .ne. m) test = test + dabs(e(ls))
|
||||
if (ls .ne. l + 1) test = test + dabs(e(ls-1))
|
||||
ztest = test + dabs(s(ls))
|
||||
if (ztest .ne. test) go to 420
|
||||
s(ls) = 0.0d0
|
||||
c ......exit
|
||||
go to 440
|
||||
420 continue
|
||||
430 continue
|
||||
440 continue
|
||||
if (ls .ne. l) go to 450
|
||||
kase = 3
|
||||
go to 470
|
||||
450 continue
|
||||
if (ls .ne. m) go to 460
|
||||
kase = 1
|
||||
go to 470
|
||||
460 continue
|
||||
kase = 2
|
||||
l = ls
|
||||
470 continue
|
||||
480 continue
|
||||
l = l + 1
|
||||
c
|
||||
c perform the task indicated by kase.
|
||||
c
|
||||
go to (490,520,540,570), kase
|
||||
c
|
||||
c deflate negligible s(m).
|
||||
c
|
||||
490 continue
|
||||
mm1 = m - 1
|
||||
f = e(m-1)
|
||||
e(m-1) = 0.0d0
|
||||
do 510 kk = l, mm1
|
||||
k = mm1 - kk + l
|
||||
t1 = s(k)
|
||||
call drotg(t1,f,cs,sn)
|
||||
s(k) = t1
|
||||
if (k .eq. l) go to 500
|
||||
f = -sn*e(k-1)
|
||||
e(k-1) = cs*e(k-1)
|
||||
500 continue
|
||||
if (wantv) call drot(p,v(1,k),1,v(1,m),1,cs,sn)
|
||||
510 continue
|
||||
go to 610
|
||||
c
|
||||
c split at negligible s(l).
|
||||
c
|
||||
520 continue
|
||||
f = e(l-1)
|
||||
e(l-1) = 0.0d0
|
||||
do 530 k = l, m
|
||||
t1 = s(k)
|
||||
call drotg(t1,f,cs,sn)
|
||||
s(k) = t1
|
||||
f = -sn*e(k)
|
||||
e(k) = cs*e(k)
|
||||
if (wantu) call drot(n,u(1,k),1,u(1,l-1),1,cs,sn)
|
||||
530 continue
|
||||
go to 610
|
||||
c
|
||||
c perform one qr step.
|
||||
c
|
||||
540 continue
|
||||
c
|
||||
c calculate the shift.
|
||||
c
|
||||
scale = dmax1(dabs(s(m)),dabs(s(m-1)),dabs(e(m-1)),
|
||||
* dabs(s(l)),dabs(e(l)))
|
||||
sm = s(m)/scale
|
||||
smm1 = s(m-1)/scale
|
||||
emm1 = e(m-1)/scale
|
||||
sl = s(l)/scale
|
||||
el = e(l)/scale
|
||||
b = ((smm1 + sm)*(smm1 - sm) + emm1**2)/2.0d0
|
||||
c = (sm*emm1)**2
|
||||
shift = 0.0d0
|
||||
if (b .eq. 0.0d0 .and. c .eq. 0.0d0) go to 550
|
||||
shift = dsqrt(b**2+c)
|
||||
if (b .lt. 0.0d0) shift = -shift
|
||||
shift = c/(b + shift)
|
||||
550 continue
|
||||
f = (sl + sm)*(sl - sm) + shift
|
||||
g = sl*el
|
||||
c
|
||||
c chase zeros.
|
||||
c
|
||||
mm1 = m - 1
|
||||
do 560 k = l, mm1
|
||||
call drotg(f,g,cs,sn)
|
||||
if (k .ne. l) e(k-1) = f
|
||||
f = cs*s(k) + sn*e(k)
|
||||
e(k) = cs*e(k) - sn*s(k)
|
||||
g = sn*s(k+1)
|
||||
s(k+1) = cs*s(k+1)
|
||||
if (wantv) call drot(p,v(1,k),1,v(1,k+1),1,cs,sn)
|
||||
call drotg(f,g,cs,sn)
|
||||
s(k) = f
|
||||
f = cs*e(k) + sn*s(k+1)
|
||||
s(k+1) = -sn*e(k) + cs*s(k+1)
|
||||
g = sn*e(k+1)
|
||||
e(k+1) = cs*e(k+1)
|
||||
if (wantu .and. k .lt. n)
|
||||
* call drot(n,u(1,k),1,u(1,k+1),1,cs,sn)
|
||||
560 continue
|
||||
e(m-1) = f
|
||||
iter = iter + 1
|
||||
go to 610
|
||||
c
|
||||
c convergence.
|
||||
c
|
||||
570 continue
|
||||
c
|
||||
c make the singular value positive.
|
||||
c
|
||||
if (s(l) .ge. 0.0d0) go to 580
|
||||
s(l) = -s(l)
|
||||
if (wantv) call dscal(p,-1.0d0,v(1,l),1)
|
||||
580 continue
|
||||
c
|
||||
c order the singular value.
|
||||
c
|
||||
590 if (l .eq. mm) go to 600
|
||||
c ...exit
|
||||
if (s(l) .ge. s(l+1)) go to 600
|
||||
t = s(l)
|
||||
s(l) = s(l+1)
|
||||
s(l+1) = t
|
||||
if (wantv .and. l .lt. p)
|
||||
* call dswap(p,v(1,l),1,v(1,l+1),1)
|
||||
if (wantu .and. l .lt. n)
|
||||
* call dswap(n,u(1,l),1,u(1,l+1),1)
|
||||
l = l + 1
|
||||
go to 590
|
||||
600 continue
|
||||
iter = 0
|
||||
m = m - 1
|
||||
610 continue
|
||||
go to 360
|
||||
620 continue
|
||||
return
|
||||
end
|
||||
93
pmsco/loess/ethanol.c
Normal file
93
pmsco/loess/ethanol.c
Normal file
@@ -0,0 +1,93 @@
|
||||
#include <stdio.h>
|
||||
#include "loess.h"
|
||||
|
||||
struct loess_struct ethanol, ethanol_cp;
|
||||
struct pred_struct ethanol_pred, ethanol_grid;
|
||||
struct ci_struct ethanol_ci;
|
||||
double NOx[] = {3.741, 2.295, 1.498, 2.881, 0.76, 3.12, 0.638, 1.17, 2.358,
|
||||
0.606, 3.669, 1, 0.981, 1.192, 0.926, 1.59, 1.806, 1.962,
|
||||
4.028, 3.148, 1.836, 2.845, 1.013, 0.414, 0.812, 0.374, 3.623,
|
||||
1.869, 2.836, 3.567, 0.866, 1.369, 0.542, 2.739, 1.2, 1.719,
|
||||
3.423, 1.634, 1.021, 2.157, 3.361, 1.39, 1.947, 0.962, 0.571,
|
||||
2.219, 1.419, 3.519, 1.732, 3.206, 2.471, 1.777, 2.571, 3.952,
|
||||
3.931, 1.587, 1.397, 3.536, 2.202, 0.756, 1.62, 3.656, 2.964,
|
||||
3.76, 0.672, 3.677, 3.517, 3.29, 1.139, 0.727, 2.581, 0.923,
|
||||
1.527, 3.388, 2.085, 0.966, 3.488, 0.754, 0.797, 2.064, 3.732,
|
||||
0.586, 0.561, 0.563, 0.678, 0.37, 0.53, 1.9};
|
||||
double C_E[] = {12, 12, 12, 12, 12, 9, 9, 9, 12, 12, 12, 12, 15, 18, 7.5, 12,
|
||||
12, 15, 15, 9, 9, 7.5, 7.5, 18, 18, 15, 15, 7.5, 7.5, 9, 15, 15,
|
||||
15, 15, 15, 9, 9, 7.5, 7.5, 7.5, 18, 18, 18, 18, 9, 9, 9, 9,
|
||||
7.5, 7.5, 7.5, 15, 18, 18, 15, 15, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5,
|
||||
7.5, 18, 18, 18, 12, 12, 9, 9, 9, 15, 15, 15, 15, 15, 7.5, 7.5,
|
||||
9, 7.5, 18, 18, 7.5, 9, 12, 15, 18, 18,
|
||||
0.907, 0.761, 1.108, 1.016, 1.189, 1.001, 1.231, 1.123, 1.042,
|
||||
1.215, 0.93, 1.152, 1.138, 0.601, 0.696, 0.686, 1.072, 1.074,
|
||||
0.934, 0.808, 1.071, 1.009, 1.142, 1.229, 1.175, 0.568, 0.977,
|
||||
0.767, 1.006, 0.893, 1.152, 0.693, 1.232, 1.036, 1.125, 1.081,
|
||||
0.868, 0.762, 1.144, 1.045, 0.797, 1.115, 1.07, 1.219, 0.637,
|
||||
0.733, 0.715, 0.872, 0.765, 0.878, 0.811, 0.676, 1.045, 0.968,
|
||||
0.846, 0.684, 0.729, 0.911, 0.808, 1.168, 0.749, 0.892, 1.002,
|
||||
0.812, 1.23, 0.804, 0.813, 1.002, 0.696, 1.199, 1.03, 0.602,
|
||||
0.694, 0.816, 1.037, 1.181, 0.899, 1.227, 1.18, 0.795, 0.99,
|
||||
1.201, 0.629, 0.608, 0.584, 0.562, 0.535, 0.655};
|
||||
double newdata[] = {7.5, 9.0, 12.0, 15.0, 18.0, 0.6, 0.8, 1.0, 0.8, 0.6};
|
||||
double Cmin = 7.5, Cmax = 18.0, Emin = 0.535, Emax = 1.232;
|
||||
double Cm[7], Em[16], grid[224];
|
||||
double tmp, coverage = .99;
|
||||
int n = 88, p = 2, m = 5, se_fit = FALSE;
|
||||
int i, j, k;
|
||||
|
||||
main() {
|
||||
printf("\nloess(ðanol): (span = 0.5)\n");
|
||||
loess_setup(C_E, NOx, n, p, ðanol);
|
||||
ethanol.model.span = 0.5;
|
||||
loess(ðanol);
|
||||
loess_summary(ðanol);
|
||||
|
||||
printf("\nloess(ðanol): (span = 0.25)\n");
|
||||
ethanol.model.span = 0.25;
|
||||
loess(ðanol);
|
||||
loess_summary(ðanol);
|
||||
|
||||
printf("\nloess(ðanol_cp): (span = 0.25)\n");
|
||||
loess_setup(C_E, NOx, n, p, ðanol_cp);
|
||||
ethanol_cp.model.span = 0.25;
|
||||
ethanol_cp.model.parametric[0] = TRUE;
|
||||
ethanol_cp.model.drop_square[0] = TRUE;
|
||||
loess(ðanol_cp);
|
||||
loess_summary(ðanol_cp);
|
||||
|
||||
printf("\nloess(ðanol_cp): (span = 0.5)\n");
|
||||
ethanol_cp.model.span = 0.5;
|
||||
loess(ðanol_cp);
|
||||
loess_summary(ðanol_cp);
|
||||
|
||||
printf("\npredict(newdata, m, ðanol, ðanol_pred, %d):\n", se_fit);
|
||||
predict(newdata, m, ðanol_cp, ðanol_pred, se_fit);
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", ethanol_pred.fit[i]);
|
||||
printf("\n");
|
||||
|
||||
m = 112;
|
||||
se_fit = TRUE;
|
||||
tmp = (Cmax - Cmin) / 6;
|
||||
for(i = 0; i < 7; i++)
|
||||
Cm[i] = Cmin + tmp * i;
|
||||
tmp = (Emax - Emin) / 15;
|
||||
for(i = 0; i < 16; i++)
|
||||
Em[i] = Emin + tmp * i;
|
||||
for(i = 0; i < 16; i++) {
|
||||
k = i * 7;
|
||||
for(j = 0; j < 7; j++) {
|
||||
grid[k + j] = Cm[j];
|
||||
grid[m + k + j] = Em[i];
|
||||
}
|
||||
}
|
||||
predict(grid, m, ðanol_cp, ðanol_grid, se_fit);
|
||||
pointwise(ðanol_grid, m, coverage, ðanol_ci);
|
||||
|
||||
loess_free_mem(ðanol);
|
||||
loess_free_mem(ðanol_cp);
|
||||
pred_free_mem(ðanol_pred);
|
||||
pred_free_mem(ðanol_grid);
|
||||
}
|
||||
7
pmsco/loess/fix_main.c
Normal file
7
pmsco/loess/fix_main.c
Normal file
@@ -0,0 +1,7 @@
|
||||
// workaround for linker error
|
||||
// "libf2c.so.0: undefined symbol: MAIN__ "
|
||||
//
|
||||
int MAIN__()
|
||||
{ return(0);
|
||||
}
|
||||
|
||||
204
pmsco/loess/galaxy.c
Normal file
204
pmsco/loess/galaxy.c
Normal file
@@ -0,0 +1,204 @@
|
||||
#include <stdio.h>
|
||||
#include "loess.h"
|
||||
|
||||
struct loess_struct galaxy;
|
||||
struct pred_struct galaxy_contour, spine_fit, spine_se;
|
||||
struct ci_struct spine_ci;
|
||||
double velocity[] = {1769, 1749, 1749, 1758, 1750, 1745, 1750, 1753, 1734,
|
||||
1710, 1711, 1709, 1674, 1665, 1680, 1648, 1626, 1581, 1602,
|
||||
1558, 1538, 1506, 1521, 1498, 1501, 1491, 1481, 1468, 1455,
|
||||
1454, 1456, 1459, 1451, 1465, 1451, 1486, 1433, 1631, 1618,
|
||||
1607, 1608, 1601, 1603, 1612, 1607, 1618, 1649, 1595, 1580,
|
||||
1574, 1574, 1559, 1578, 1591, 1579, 1588, 1581, 1569, 1572,
|
||||
1584, 1565, 1718, 1711, 1710, 1715, 1713, 1717, 1715, 1712,
|
||||
1710, 1692, 1669, 1679, 1691, 1647, 1630, 1616, 1576, 1561,
|
||||
1558, 1538, 1525, 1509, 1501, 1494, 1489, 1493, 1487, 1495,
|
||||
1511, 1505, 1508, 1507, 1513, 1493, 1495, 1736, 1744, 1765,
|
||||
1766, 1764, 1715, 1751, 1761, 1763, 1758, 1743, 1738, 1732,
|
||||
1734, 1723, 1706, 1665, 1677, 1679, 1601, 1629, 1621, 1574,
|
||||
1559, 1540, 1525, 1517, 1506, 1481, 1465, 1468, 1465, 1454,
|
||||
1448, 1441, 1441, 1430, 1434, 1445, 1464, 1471, 1442, 1436,
|
||||
1434, 1428, 1558, 1563, 1581, 1548, 1572, 1574, 1578, 1576,
|
||||
1583, 1584, 1566, 1568, 1577, 1587, 1606, 1593, 1584, 1595,
|
||||
1617, 1552, 1597, 1615, 1626, 1626, 1586, 1624, 1600, 1585,
|
||||
1738, 1690, 1729, 1719, 1702, 1754, 1741, 1736, 1731, 1725,
|
||||
1710, 1673, 1669, 1641, 1675, 1681, 1645, 1594, 1583, 1599,
|
||||
1578, 1548, 1543, 1537, 1543, 1519, 1500, 1488, 1486, 1483,
|
||||
1481, 1485, 1480, 1479, 1505, 1482, 1481, 1489, 1531, 1533,
|
||||
1539, 1526, 1551, 1549, 1532, 1538, 1550, 1536, 1519, 1536,
|
||||
1535, 1536, 1533, 1528, 1539, 1546, 1552, 1557, 1573, 1553,
|
||||
1576, 1591, 1591, 1624, 1633, 1597, 1605, 1629, 1658, 1664,
|
||||
1667, 1671, 1687, 1682, 1668, 1673, 1684, 1668, 1618, 1658,
|
||||
1644, 1647, 1642, 1616, 1629, 1610, 1603, 1613, 1603, 1606,
|
||||
1603, 1608, 1613, 1616, 1615, 1611, 1580, 1580, 1586, 1591,
|
||||
1592, 1562, 1572, 1589, 1588, 1585, 1586, 1573, 1573, 1558,
|
||||
1566, 1740, 1704, 1748, 1757, 1775, 1765, 1762, 1752, 1752,
|
||||
1753, 1753, 1748, 1730, 1709, 1688, 1687, 1678, 1654, 1634,
|
||||
1611, 1590, 1562, 1565, 1541, 1537, 1515, 1498, 1479, 1481,
|
||||
1475, 1466, 1461, 1457, 1455, 1452, 1453, 1448, 1469, 1456,
|
||||
1448, 1409, 1416, 1429};
|
||||
double direction[] = {8.46279, 7.96498, 7.46717, 6.96936, 6.47154, 5.97373,
|
||||
5.47592, 4.97811, 4.4803, 3.98249, 3.46303, 2.96522,
|
||||
2.46741, 1.9696, 1.47179, 0.973978, 0.476167, -0.021644,
|
||||
-0.519455, -1.01727, -1.51508, -2.01289, -2.5107,
|
||||
-3.00851, -3.52797, -4.02578, -4.52359, -5.0214,
|
||||
-5.51921, -6.01702, -6.51483, -7.01264, -7.51045,
|
||||
-8.00827, -8.50608, -9.5017, -11.0168, 27.8244, 21.088,
|
||||
18.8425, 16.597, 14.3516, 12.1061, 9.86059, 7.61511,
|
||||
5.272, 3.02652, 0.781037, -1.46444, -3.70992, -5.95541,
|
||||
-8.20089, -10.4464, -12.6918, -14.9373, -17.1828,
|
||||
-19.4283, -21.6738, -23.9193, -26.2624, -28.5078,
|
||||
23.8699, 22.3013, 20.7327, 19.1642, 17.5956, 16.027,
|
||||
14.3902, 12.8216, 11.253, 9.68438, 8.11578, 6.54718,
|
||||
4.97859, 3.40999, 1.8414, 0.272799, -1.2958, -2.86439,
|
||||
-4.43299, -6.00159, -7.63838, -9.20698, -10.7756,
|
||||
-12.3442, -13.9128, -15.4814, -17.05, -18.6186,
|
||||
-20.1872, -21.7557, -23.3243, -24.8929, -26.4615,
|
||||
-28.0301, -29.6669, 18.4201, 17.5959, 16.7716, 15.9474,
|
||||
14.263, 13.4388, 12.6146, 11.7903, 10.9661, 10.1418,
|
||||
9.31757, 8.49332, 7.66907, 6.84483, 6.02058, 5.19634,
|
||||
4.37209, 3.54784, 2.68776, 1.86351, 1.03927, 0.215021,
|
||||
-0.609226, -1.43347, -2.25772, -3.08196, -3.90621,
|
||||
-4.73046, -5.5547, -6.37895, -7.2032, -8.02744,
|
||||
-8.88752, -9.71177, -10.536, -11.3603, -12.1845,
|
||||
-13.0088, -13.833, -14.6572, -15.4815, -16.3057,
|
||||
-17.13, -17.9542, -18.7785, 25.8899, 24.2078, 22.4526,
|
||||
20.8436, 19.1615, 17.4794, 15.7972, 14.1151, 12.433,
|
||||
10.7509, 9.06879, 7.31354, 5.70456, 3.94931, 2.19406,
|
||||
0.511948, -1.09703, -2.77914, -4.46126, -6.07024,
|
||||
-7.82548, -9.5076, -11.1897, -12.8718, -14.5539,
|
||||
-16.2361, -23.1108, -24.7198, 1.97596, 1.77531, 1.67498,
|
||||
1.57466, 1.47434, 1.37401, 1.27369, 1.17336, 1.07304,
|
||||
0.972712, 0.872388, 0.767701, 0.667377, 0.567052,
|
||||
0.466727, 0.366403, 0.266078, 0.165754, 0.0654291,
|
||||
-0.0348955, -0.13522, -0.235545, -0.335869, -0.436194,
|
||||
-0.536518, -0.636843, -0.74153, -0.841854, -0.942179,
|
||||
-1.0425, -1.14283, -1.24315, -1.34348, -1.4438,
|
||||
-1.54413, -1.64445, -1.74478, -1.8451, 24.8532, 23.827,
|
||||
22.8007, 21.7298, 20.7036, 19.6773, 18.6511, 16.5539,
|
||||
15.5723, 14.546, 13.4752, 12.4489, 11.4227, 10.3964,
|
||||
9.37015, 8.3439, 7.31764, 6.29139, 5.26513, 4.23888,
|
||||
3.21262, 2.18637, 1.16011, 0.133859, -0.937015,
|
||||
-1.96327, -2.98953, -4.01578, -5.04204, -6.06829,
|
||||
-7.04993, -8.07618, -9.14706, -10.1733, -11.1996,
|
||||
-12.2258, -13.2521, -14.2783, -15.3046, -16.3308,
|
||||
-17.3571, -18.3834, -19.4096, -20.4359, -21.4621,
|
||||
-22.4884, 29.4841, 27.0434, 25.0908, 22.6501, 20.4046,
|
||||
18.1591, 15.9136, 13.7658, 11.4227, 9.17718, 6.9317,
|
||||
4.58859, 2.44074, 0.0976296, -2.05022, -4.19807,
|
||||
-6.63881, -8.88429, -11.1298, -13.2776, -15.5231,
|
||||
-17.8662, -20.1117, -22.3572, -24.6027, -26.8481,
|
||||
-29.0936, 10.8869, 9.39348, 8.91731, 8.39786, 7.92169,
|
||||
7.42388, 6.92607, 6.42826, 5.9088, 5.41099, 4.91318,
|
||||
4.41537, 3.91756, 3.44139, 2.92193, 2.42412, 1.92631,
|
||||
1.4285, 0.93069, 0.432879, -0.0649319, -0.562743,
|
||||
-1.06055, -1.55837, -2.07782, -2.55399, -3.07344,
|
||||
-3.57125, -4.06906, -4.56688, -5.06469, -5.5625,
|
||||
-6.06031, -6.55812, -7.05593, -7.57539, -8.0732,
|
||||
-8.54937, -9.09046, -9.58827, -10.0428, -10.5406,
|
||||
-11.0601,
|
||||
-38.1732, -35.9277, -33.6822, -31.4367, -29.1912,
|
||||
-26.9458, -24.7003, -22.4548, -20.2093, -17.9638, -15.6207,
|
||||
-13.3753, -11.1298, -8.88429, -6.63881, -4.39333, -2.14785,
|
||||
0.0976296, 2.34311, 4.58859, 6.83407, 9.07955, 11.325,
|
||||
13.5705, 15.9136, 18.1591, 20.4046, 22.6501, 24.8955,
|
||||
27.141, 29.3865, 31.632, 33.8775, 36.123, 38.3684, 42.8594,
|
||||
49.6935, 6.16853, 4.6751, 4.17728, 3.67947, 3.18166, 2.68385,
|
||||
2.18604, 1.68823, 1.16877, 0.670963, 0.173152, -0.324659,
|
||||
-0.822471, -1.32028, -1.81809, -2.3159, -2.81371, -3.31153,
|
||||
-3.80934, -4.30715, -4.80496, -5.30277, -5.82223, -6.32004,
|
||||
-25.5974, -23.9153, -22.2332, -20.551, -18.8689, -17.1868,
|
||||
-15.4316, -13.7494, -12.0673, -10.3852, -8.70311, -7.021,
|
||||
-5.33888, -3.65677, -1.97466, -0.292541, 1.38957, 3.07169,
|
||||
4.7538, 6.43591, 8.19116, 9.87327, 11.5554, 13.2375, 14.9196,
|
||||
16.6017, 18.2838, 19.966, 21.6481, 23.3302, 25.0123, 26.6944,
|
||||
28.3765, 30.0586, 31.8139, -47.986, -45.8388, -43.6916,
|
||||
-41.5443, -37.1565, -35.0093, -32.862, -30.7148, -28.5676,
|
||||
-26.4203, -24.2731, -22.1259, -19.9786, -17.8314, -15.6842,
|
||||
-13.5369, -11.3897, -9.24245, -7.00185, -4.85462, -2.70738,
|
||||
-0.560148, 1.58709, 3.73432, 5.88156, 8.02879, 10.176,
|
||||
12.3233, 14.4705, 16.6177, 18.765, 20.9122, 23.1528, 25.3,
|
||||
27.4473, 29.5945, 31.7417, 33.889, 36.0362, 38.1834, 40.3307,
|
||||
42.4779, 44.6251, 46.7724, 48.9196, 24.1427, 22.5741, 20.9373,
|
||||
19.437, 17.8684, 16.2998, 14.7312, 13.1626, 11.594, 10.0254,
|
||||
8.45678, 6.81998, 5.31959, 3.68279, 2.04599, 0.477399, -1.023,
|
||||
-2.59159, -4.16019, -5.66059, -7.29738, -8.86598, -10.4346,
|
||||
-12.0032, -13.5718, -15.1404, -21.5511, -23.0515, -45.2569,
|
||||
-40.6613, -38.3635, -36.0656, -33.7678, -31.47, -29.1722,
|
||||
-26.8744, -24.5766, -22.2788, -19.981, -17.5832, -15.2854,
|
||||
-12.9876, -10.6898, -8.392, -6.09419, -3.79638, -1.49857,
|
||||
0.799239, 3.09705, 5.39486, 7.69267, 9.99048, 12.2883,
|
||||
14.5861, 16.9838, 19.2816, 21.5794, 23.8773, 26.1751, 28.4729,
|
||||
30.7707, 33.0685, 35.3663, 37.6641, 39.9619, 42.2597, 49.8478,
|
||||
47.7895, 45.7311, 43.5833, 41.525, 39.4666, 37.4083, 33.2021,
|
||||
31.2332, 29.1749, 27.027, 24.9687, 22.9103, 20.852, 18.7936,
|
||||
16.7353, 14.6769, 12.6186, 10.5602, 8.50188, 6.44353, 4.38518,
|
||||
2.32683, 0.26848, -1.87936, -3.93771, -5.99606, -8.05441,
|
||||
-10.1128, -12.1711, -14.14, -16.1983, -18.3462, -20.4045,
|
||||
-22.4629, -24.5212, -26.5796, -28.6379, -30.6962, -32.7546,
|
||||
-34.8129, -36.8713, -38.9296, -40.988, -43.0463, -45.1047,
|
||||
6.53648, 5.99538, 5.5625, 5.0214, 4.52359, 4.02578, 3.52797,
|
||||
3.0518, 2.53234, 2.03453, 1.53672, 1.01727, 0.541099,
|
||||
0.021644, -0.454523, -0.93069, -1.47179, -1.9696, -2.46741,
|
||||
-2.94358, -3.44139, -3.96084, -4.45866, -4.95647, -5.45428,
|
||||
-5.95209, -6.4499, -49.1077, -42.3712, -40.2234, -37.8803,
|
||||
-35.7324, -33.487, -31.2415, -28.996, -26.6529, -24.4074,
|
||||
-22.1619, -19.9164, -17.671, -15.5231, -13.18, -10.9345,
|
||||
-8.68903, -6.44355, -4.19807, -1.95259, 0.292889, 2.53837,
|
||||
4.78385, 7.02933, 9.37244, 11.5203, 13.8634, 16.1089, 18.3544,
|
||||
20.5998, 22.8453, 25.0908, 27.3363, 29.5818, 31.8272, 34.1704,
|
||||
36.4158, 38.5637, 41.0044, 43.2499, 45.3001, 47.5456,
|
||||
49.8887};
|
||||
double ew[59], ns[99], grid[11682], fit_eval[200], ci_eval[30];
|
||||
double tmp, range = 98, coverage = .99;
|
||||
int n = 323, p = 2, m, se_fit = FALSE;
|
||||
int i, j, k;
|
||||
|
||||
main() {
|
||||
printf("\nloess(&galaxy):\n");
|
||||
loess_setup(direction, velocity, n, p, &galaxy);
|
||||
galaxy.model.span = 0.35;
|
||||
galaxy.model.normalize = FALSE;
|
||||
galaxy.model.family = "symmetric";
|
||||
loess(&galaxy);
|
||||
loess_summary(&galaxy);
|
||||
|
||||
m = 5841;
|
||||
tmp = -29.0;
|
||||
for(i = 0; i < 59; i++)
|
||||
ew[i] = tmp++;
|
||||
tmp = -49.0;
|
||||
for(i = 0; i < 99; i++)
|
||||
ns[i] = tmp++;
|
||||
for(i = 0; i < 99; i++) {
|
||||
k = i * 59;
|
||||
for(j = 0; j < 59; j++) {
|
||||
grid[k + j] = ew[j];
|
||||
grid[m + k + j] = ns[i];
|
||||
}
|
||||
}
|
||||
predict(grid, m, &galaxy, &galaxy_contour, se_fit);
|
||||
|
||||
m = 100;
|
||||
tmp = range / 99;
|
||||
for(i = 0; i < 100; i++) {
|
||||
fit_eval[i + 100] = -49 + tmp * i;
|
||||
fit_eval[i] = fit_eval[i + 100] / (-3.7);
|
||||
}
|
||||
predict(fit_eval, m, &galaxy, &spine_fit, se_fit);
|
||||
|
||||
m = 15;
|
||||
se_fit = TRUE;
|
||||
tmp = range / 14;
|
||||
for(i = 0; i < m; i++) {
|
||||
ci_eval[i + m] = -49 + tmp * i;
|
||||
ci_eval[i] = fit_eval[i + 100] / (-3.7);
|
||||
}
|
||||
predict(ci_eval, m, &galaxy, &spine_se, se_fit);
|
||||
pointwise(&spine_se, m, coverage, &spine_ci);
|
||||
|
||||
loess_free_mem(&galaxy);
|
||||
pred_free_mem(&galaxy_contour);
|
||||
pred_free_mem(&spine_fit);
|
||||
pred_free_mem(&spine_se);
|
||||
}
|
||||
69
pmsco/loess/gas.c
Normal file
69
pmsco/loess/gas.c
Normal file
@@ -0,0 +1,69 @@
|
||||
/* sample program for the gas data using loess */
|
||||
|
||||
#include <stdio.h>
|
||||
#include "loess.h"
|
||||
|
||||
struct loess_struct gas, gas_null;
|
||||
struct pred_struct gas_pred;
|
||||
struct ci_struct gas_ci;
|
||||
struct anova_struct gas_anova;
|
||||
double NOx[] = {4.818, 2.849, 3.275, 4.691, 4.255, 5.064, 2.118, 4.602,
|
||||
2.286, 0.97, 3.965, 5.344, 3.834, 1.99, 5.199, 5.283,
|
||||
3.752, 0.537, 1.64, 5.055, 4.937, 1.561};
|
||||
double E[] = {0.831, 1.045, 1.021, 0.97, 0.825, 0.891, 0.71, 0.801,
|
||||
1.074, 1.148, 1, 0.928, 0.767, 0.701, 0.807, 0.902,
|
||||
0.997, 1.224, 1.089, 0.973, 0.98, 0.665};
|
||||
double gas_fit_E[] = {0.665, 0.949, 1.224};
|
||||
double newdata[] = {0.6650000, 0.7581667, 0.8513333, 0.9445000,
|
||||
1.0376667, 1.1308333, 1.2240000};
|
||||
double coverage = .99;
|
||||
int i, n = 22, p = 1, m = 3, se_fit = FALSE;
|
||||
|
||||
main() {
|
||||
printf("\nloess(&gas):\n");
|
||||
loess_setup(E, NOx, n, p, &gas);
|
||||
gas.model.span = 2.0 / 3.0;
|
||||
loess(&gas);
|
||||
loess_summary(&gas);
|
||||
|
||||
printf("\nloess(&gas_null):\n");
|
||||
loess_setup(E, NOx, n, p, &gas_null);
|
||||
gas_null.model.span = 1.0;
|
||||
loess(&gas_null);
|
||||
loess_summary(&gas_null);
|
||||
|
||||
printf("\npredict(gas_fit_E, m, &gas, &gas_pred, %d):\n", se_fit);
|
||||
predict(gas_fit_E, m, &gas, &gas_pred, se_fit);
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", gas_pred.fit[i]);
|
||||
printf("\n");
|
||||
|
||||
m = 7;
|
||||
se_fit = TRUE;
|
||||
predict(newdata, m, &gas, &gas_pred, se_fit);
|
||||
printf("\npointwise(&gas_pred, m, coverage, &gas_ci):\n");
|
||||
pointwise(&gas_pred, m, coverage, &gas_ci);
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", gas_ci.upper[i]);
|
||||
printf("\n");
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", gas_ci.fit[i]);
|
||||
printf("\n");
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", gas_ci.lower[i]);
|
||||
printf("\n");
|
||||
|
||||
printf("\nanova(&gas_null, &gas, &gas_anova):\n");
|
||||
anova(&gas_null, &gas, &gas_anova);
|
||||
printf("%g %g %g %g\n", gas_anova.dfn, gas_anova.dfd,
|
||||
gas_anova.F_value, gas_anova.Pr_F);
|
||||
|
||||
loess_free_mem(&gas);
|
||||
loess_free_mem(&gas_null);
|
||||
pred_free_mem(&gas_pred);
|
||||
pw_free_mem(&gas_ci);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
327
pmsco/loess/loess.c
Normal file
327
pmsco/loess/loess.c
Normal file
@@ -0,0 +1,327 @@
|
||||
#include "S.h"
|
||||
#include "loess.h"
|
||||
|
||||
static char *surf_stat;
|
||||
|
||||
void
|
||||
loess_setup(x, y, n, p, lo)
|
||||
double *x, *y;
|
||||
int n, p;
|
||||
struct loess_struct *lo;
|
||||
{
|
||||
int i, max_kd;
|
||||
|
||||
max_kd = n > 200 ? n : 200;
|
||||
|
||||
lo->in.y = (double *) malloc(n * sizeof(double));
|
||||
lo->in.x = (double *) malloc(n * p * sizeof(double));
|
||||
lo->in.weights = (double *) malloc(n * sizeof(double));
|
||||
for(i = 0; i < (n * p); i++)
|
||||
lo->in.x[i] = x[i];
|
||||
for(i = 0; i < n; i++) {
|
||||
lo->in.y[i] = y[i];
|
||||
lo->in.weights[i] = 1;
|
||||
}
|
||||
lo->in.n = n;
|
||||
lo->in.p = p;
|
||||
lo->model.span = 0.75;
|
||||
lo->model.degree = 2;
|
||||
lo->model.normalize = TRUE;
|
||||
for(i = 0; i < 8; i++)
|
||||
lo->model.parametric[i] = lo->model.drop_square[i] = FALSE;
|
||||
lo->model.family = "gaussian";
|
||||
lo->control.surface = "interpolate";
|
||||
lo->control.statistics = "approximate";
|
||||
lo->control.cell = 0.2;
|
||||
lo->control.trace_hat = "wait.to.decide";
|
||||
lo->control.iterations = 4;
|
||||
|
||||
lo->out.fitted_values = (double *) malloc(n * sizeof(double));
|
||||
lo->out.fitted_residuals = (double *) malloc(n * sizeof(double));
|
||||
lo->out.pseudovalues = (double *) malloc(n * sizeof(double));
|
||||
lo->out.diagonal = (double *) malloc(n * sizeof(double));
|
||||
lo->out.robust = (double *) malloc(n * sizeof(double));
|
||||
lo->out.divisor = (double *) malloc(p * sizeof(double));
|
||||
|
||||
lo->kd_tree.parameter = (int *) malloc(7 * sizeof(int));
|
||||
lo->kd_tree.a = (int *) malloc(max_kd * sizeof(int));
|
||||
lo->kd_tree.xi = (double *) malloc(max_kd * sizeof(double));
|
||||
lo->kd_tree.vert = (double *) malloc(p * 2 * sizeof(double));
|
||||
lo->kd_tree.vval = (double *) malloc((p + 1) * max_kd * sizeof(double));
|
||||
}
|
||||
|
||||
void
|
||||
loess(lo)
|
||||
struct loess_struct *lo;
|
||||
{
|
||||
int size_info[2], iterations;
|
||||
void loess_();
|
||||
|
||||
size_info[0] = lo->in.p;
|
||||
size_info[1] = lo->in.n;
|
||||
|
||||
iterations = (!strcmp(lo->model.family, "gaussian")) ? 0 :
|
||||
lo->control.iterations;
|
||||
if(!strcmp(lo->control.trace_hat, "wait.to.decide")) {
|
||||
if(!strcmp(lo->control.surface, "interpolate"))
|
||||
lo->control.trace_hat = (lo->in.n < 500) ? "exact" : "approximate";
|
||||
else
|
||||
lo->control.trace_hat = "exact";
|
||||
}
|
||||
loess_(lo->in.y, lo->in.x, size_info, lo->in.weights,
|
||||
&lo->model.span,
|
||||
&lo->model.degree,
|
||||
lo->model.parametric,
|
||||
lo->model.drop_square,
|
||||
&lo->model.normalize,
|
||||
&lo->control.statistics,
|
||||
&lo->control.surface,
|
||||
&lo->control.cell,
|
||||
&lo->control.trace_hat,
|
||||
&iterations,
|
||||
lo->out.fitted_values,
|
||||
lo->out.fitted_residuals,
|
||||
&lo->out.enp,
|
||||
&lo->out.s,
|
||||
&lo->out.one_delta,
|
||||
&lo->out.two_delta,
|
||||
lo->out.pseudovalues,
|
||||
&lo->out.trace_hat,
|
||||
lo->out.diagonal,
|
||||
lo->out.robust,
|
||||
lo->out.divisor,
|
||||
lo->kd_tree.parameter,
|
||||
lo->kd_tree.a,
|
||||
lo->kd_tree.xi,
|
||||
lo->kd_tree.vert,
|
||||
lo->kd_tree.vval);
|
||||
}
|
||||
|
||||
void
|
||||
loess_(y, x_, size_info, weights, span, degree, parametric, drop_square,
|
||||
normalize, statistics, surface, cell, trace_hat_in, iterations,
|
||||
fitted_values, fitted_residuals, enp, s, one_delta, two_delta,
|
||||
pseudovalues, trace_hat_out, diagonal, robust, divisor,
|
||||
parameter, a, xi, vert, vval)
|
||||
double *y, *x_, *weights, *span, *cell, *pseudovalues,
|
||||
*fitted_values, *fitted_residuals, *enp, *s, *one_delta, *two_delta,
|
||||
*trace_hat_out, *diagonal, *robust, *divisor, *xi, *vert, *vval;
|
||||
int *size_info, *degree, *parametric, *drop_square, *normalize,
|
||||
*iterations, *parameter, *a;
|
||||
char **statistics, **surface, **trace_hat_in;
|
||||
{
|
||||
double *x, *x_tmp, new_cell, trL, delta1, delta2, sum_squares = 0,
|
||||
*pseudo_resid, *temp, *xi_tmp, *vert_tmp, *vval_tmp,
|
||||
*diag_tmp, trL_tmp = 0, d1_tmp = 0, d2_tmp = 0, sum, mean;
|
||||
int i, j, k, p, N, D, sum_drop_sqr = 0, sum_parametric = 0,
|
||||
setLf, nonparametric = 0, *order_parametric,
|
||||
*order_drop_sqr, zero = 0, max_kd, *a_tmp, *param_tmp;
|
||||
int cut, comp();
|
||||
char *new_stat;
|
||||
void condition();
|
||||
|
||||
D = size_info[0];
|
||||
N = size_info[1];
|
||||
max_kd = (N > 200 ? N : 200);
|
||||
*one_delta = *two_delta = *trace_hat_out = 0;
|
||||
|
||||
x = (double *) malloc(D * N * sizeof(double));
|
||||
x_tmp = (double *) malloc(D * N * sizeof(double));
|
||||
temp = (double *) malloc(N * sizeof(double));
|
||||
a_tmp = (int *) malloc(max_kd * sizeof(int));
|
||||
xi_tmp = (double *) malloc(max_kd * sizeof(double));
|
||||
vert_tmp = (double *) malloc(D * 2 * sizeof(double));
|
||||
vval_tmp = (double *) malloc((D + 1) * max_kd * sizeof(double));
|
||||
diag_tmp = (double *) malloc(N * sizeof(double));
|
||||
param_tmp = (int *) malloc(N * sizeof(int));
|
||||
order_parametric = (int *) malloc(D * sizeof(int));
|
||||
order_drop_sqr = (int *) malloc(D * sizeof(int));
|
||||
if((*iterations) > 0)
|
||||
pseudo_resid = (double *) malloc(N * sizeof(double));
|
||||
|
||||
new_cell = (*span) * (*cell);
|
||||
for(i = 0; i < N; i++)
|
||||
robust[i] = 1;
|
||||
for(i = 0; i < (N * D); i++)
|
||||
x_tmp[i] = x_[i];
|
||||
if((*normalize) && (D > 1)) {
|
||||
cut = ceil(0.100000000000000000001 * N);
|
||||
for(i = 0; i < D; i++) {
|
||||
k = i * N;
|
||||
for(j = 0; j < N; j++)
|
||||
temp[j] = x_[k + j];
|
||||
qsort(temp, N, sizeof(double), comp);
|
||||
sum = 0;
|
||||
for(j = cut; j <= (N - cut - 1); j++)
|
||||
sum = sum + temp[j];
|
||||
mean = sum / (N - 2 * cut);
|
||||
sum = 0;
|
||||
for(j = cut; j <= (N - cut - 1); j++) {
|
||||
temp[j] = temp[j] - mean;
|
||||
sum = sum + temp[j] * temp[j];
|
||||
}
|
||||
divisor[i] = sqrt(sum / (N - 2 * cut - 1));
|
||||
for(j = 0; j < N; j++) {
|
||||
p = k + j;
|
||||
x_tmp[p] = x_[p] / divisor[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
for(i = 0; i < D; i++) divisor[i] = 1;
|
||||
j = D - 1;
|
||||
for(i = 0; i < D; i++) {
|
||||
sum_drop_sqr = sum_drop_sqr + drop_square[i];
|
||||
sum_parametric = sum_parametric + parametric[i];
|
||||
if(parametric[i])
|
||||
order_parametric[j--] = i;
|
||||
else
|
||||
order_parametric[nonparametric++] = i;
|
||||
}
|
||||
for(i = 0; i < D; i++) {
|
||||
order_drop_sqr[i] = 2 - drop_square[order_parametric[i]];
|
||||
k = i * N;
|
||||
p = order_parametric[i] * N;
|
||||
for(j = 0; j < N; j++)
|
||||
x[k + j] = x_tmp[p + j];
|
||||
}
|
||||
if((*degree) == 1 && sum_drop_sqr) {
|
||||
fprintf(stderr, "Specified the square of a factor predictor to be dropped when degree = 1");
|
||||
exit(1);
|
||||
}
|
||||
if(D == 1 && sum_drop_sqr) {
|
||||
fprintf(stderr, "Specified the square of a predictor to be dropped with only one numeric predictor");
|
||||
exit(1);
|
||||
}
|
||||
if(sum_parametric == D) {
|
||||
fprintf(stderr, "Specified parametric for all predictors");
|
||||
exit(1);
|
||||
}
|
||||
for(j = 0; j <= (*iterations); j++) {
|
||||
new_stat = j ? "none" : *statistics;
|
||||
for(i = 0; i < N; i++)
|
||||
robust[i] = weights[i] * robust[i];
|
||||
condition(surface, new_stat, trace_hat_in);
|
||||
setLf = !strcmp(surf_stat, "interpolate/exact");
|
||||
loess_raw(y, x, weights, robust, &D, &N, span, degree,
|
||||
&nonparametric, order_drop_sqr, &sum_drop_sqr,
|
||||
&new_cell, &surf_stat, fitted_values, parameter, a,
|
||||
xi, vert, vval, diagonal, &trL, &delta1, &delta2,
|
||||
&setLf);
|
||||
if(j == 0) {
|
||||
*trace_hat_out = trL;
|
||||
*one_delta = delta1;
|
||||
*two_delta = delta2;
|
||||
}
|
||||
for(i = 0; i < N; i++)
|
||||
fitted_residuals[i] = y[i] - fitted_values[i];
|
||||
if(j < (*iterations))
|
||||
F77_SUB(lowesw)(fitted_residuals, &N, robust, temp);
|
||||
}
|
||||
if((*iterations) > 0) {
|
||||
F77_SUB(lowesp)(&N, y, fitted_values, weights, robust, temp, pseudovalues);
|
||||
|
||||
loess_raw(pseudovalues, x, weights, weights, &D, &N, span,
|
||||
degree, &nonparametric, order_drop_sqr, &sum_drop_sqr,
|
||||
&new_cell, &surf_stat, temp, param_tmp, a_tmp, xi_tmp,
|
||||
vert_tmp, vval_tmp, diag_tmp, &trL_tmp, &d1_tmp, &d2_tmp, &zero);
|
||||
for(i = 0; i < N; i++)
|
||||
pseudo_resid[i] = pseudovalues[i] - temp[i];
|
||||
}
|
||||
if((*iterations) == 0)
|
||||
for(i = 0; i < N; i++)
|
||||
sum_squares = sum_squares + weights[i] *
|
||||
fitted_residuals[i] * fitted_residuals[i];
|
||||
else
|
||||
for(i = 0; i < N; i++)
|
||||
sum_squares = sum_squares + weights[i] *
|
||||
pseudo_resid[i] * pseudo_resid[i];
|
||||
*enp = (*one_delta) + 2 * (*trace_hat_out) - N;
|
||||
*s = sqrt(sum_squares / (*one_delta));
|
||||
|
||||
free(x);
|
||||
free(x_tmp);
|
||||
free(temp);
|
||||
free(xi_tmp);
|
||||
free(vert_tmp);
|
||||
free(vval_tmp);
|
||||
free(diag_tmp);
|
||||
free(a_tmp);
|
||||
free(param_tmp);
|
||||
free(order_parametric);
|
||||
free(order_drop_sqr);
|
||||
if((*iterations) > 0)
|
||||
free(pseudo_resid);
|
||||
}
|
||||
|
||||
void
|
||||
loess_free_mem(lo)
|
||||
struct loess_struct *lo;
|
||||
{
|
||||
free(lo->in.x);
|
||||
free(lo->in.y);
|
||||
free(lo->in.weights);
|
||||
free(lo->out.fitted_values);
|
||||
free(lo->out.fitted_residuals);
|
||||
free(lo->out.pseudovalues);
|
||||
free(lo->out.diagonal);
|
||||
free(lo->out.robust);
|
||||
free(lo->out.divisor);
|
||||
free(lo->kd_tree.parameter);
|
||||
free(lo->kd_tree.a);
|
||||
free(lo->kd_tree.xi);
|
||||
free(lo->kd_tree.vert);
|
||||
free(lo->kd_tree.vval);
|
||||
}
|
||||
|
||||
void
|
||||
loess_summary(lo)
|
||||
struct loess_struct *lo;
|
||||
{
|
||||
printf("Number of Observations: %d\n", lo->in.n);
|
||||
printf("Equivalent Number of Parameters: %.1f\n", lo->out.enp);
|
||||
if(!strcmp(lo->model.family, "gaussian"))
|
||||
printf("Residual Standard Error: ");
|
||||
else
|
||||
printf("Residual Scale Estimate: ");
|
||||
printf("%.4f\n", lo->out.s);
|
||||
}
|
||||
|
||||
void
|
||||
condition(surface, new_stat, trace_hat_in)
|
||||
char **surface, *new_stat, **trace_hat_in;
|
||||
{
|
||||
if(!strcmp(*surface, "interpolate")) {
|
||||
if(!strcmp(new_stat, "none"))
|
||||
surf_stat = "interpolate/none";
|
||||
else if(!strcmp(new_stat, "exact"))
|
||||
surf_stat = "interpolate/exact";
|
||||
else if(!strcmp(new_stat, "approximate"))
|
||||
{
|
||||
if(!strcmp(*trace_hat_in, "approximate"))
|
||||
surf_stat = "interpolate/2.approx";
|
||||
else if(!strcmp(*trace_hat_in, "exact"))
|
||||
surf_stat = "interpolate/1.approx";
|
||||
}
|
||||
}
|
||||
else if(!strcmp(*surface, "direct")) {
|
||||
if(!strcmp(new_stat, "none"))
|
||||
surf_stat = "direct/none";
|
||||
else if(!strcmp(new_stat, "exact"))
|
||||
surf_stat = "direct/exact";
|
||||
else if(!strcmp(new_stat, "approximate"))
|
||||
surf_stat = "direct/approximate";
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
comp(d1, d2)
|
||||
double *d1, *d2;
|
||||
{
|
||||
if(*d1 < *d2)
|
||||
return(-1);
|
||||
else if(*d1 == *d2)
|
||||
return(0);
|
||||
else
|
||||
return(1);
|
||||
}
|
||||
70
pmsco/loess/loess.h
Normal file
70
pmsco/loess/loess.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/* for the meaning of these fields, see struct.m */
|
||||
/* longs are used here so that the codes can be called from S */
|
||||
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
extern struct loess_struct {
|
||||
struct {
|
||||
int n;
|
||||
int p;
|
||||
double *y;
|
||||
double *x;
|
||||
double *weights;
|
||||
} in;
|
||||
struct {
|
||||
double span;
|
||||
int degree;
|
||||
int normalize;
|
||||
int parametric[8];
|
||||
int drop_square[8];
|
||||
char *family;
|
||||
} model;
|
||||
struct {
|
||||
char *surface;
|
||||
char *statistics;
|
||||
double cell;
|
||||
char *trace_hat;
|
||||
int iterations;
|
||||
} control;
|
||||
struct {
|
||||
int *parameter;
|
||||
int *a;
|
||||
double *xi;
|
||||
double *vert;
|
||||
double *vval;
|
||||
} kd_tree;
|
||||
struct {
|
||||
double *fitted_values;
|
||||
double *fitted_residuals;
|
||||
double enp;
|
||||
double s;
|
||||
double one_delta;
|
||||
double two_delta;
|
||||
double *pseudovalues;
|
||||
double trace_hat;
|
||||
double *diagonal;
|
||||
double *robust;
|
||||
double *divisor;
|
||||
} out;
|
||||
} loess_struct;
|
||||
|
||||
extern struct pred_struct {
|
||||
double *fit;
|
||||
double *se_fit;
|
||||
double residual_scale;
|
||||
double df;
|
||||
} pred_struct;
|
||||
|
||||
extern struct anova_struct {
|
||||
double dfn;
|
||||
double dfd;
|
||||
double F_value;
|
||||
double Pr_F;
|
||||
} anova_struct;
|
||||
|
||||
extern struct ci_struct {
|
||||
double *fit;
|
||||
double *upper;
|
||||
double *lower;
|
||||
} ci_struct;
|
||||
284
pmsco/loess/loess.i
Normal file
284
pmsco/loess/loess.i
Normal file
@@ -0,0 +1,284 @@
|
||||
%module loess
|
||||
%include "typemaps.i"
|
||||
|
||||
%{
|
||||
#define SWIG_FILE_WITH_INIT
|
||||
#include <errno.h>
|
||||
#define EARRLEN 1000
|
||||
#include "loess.h"
|
||||
|
||||
extern void loess(struct loess_struct *lo);
|
||||
|
||||
extern void loess_summary(struct loess_struct *lo);
|
||||
|
||||
// not implemented
|
||||
// extern void predict(double *eval, int m, struct loess_struct *lo, struct pred_struct *pre, int se);
|
||||
%}
|
||||
|
||||
%include "numpy.i"
|
||||
|
||||
%init %{
|
||||
import_array();
|
||||
%}
|
||||
|
||||
%apply (double *IN_ARRAY1, int DIM1) {(double *v, int n)};
|
||||
%apply (int *IN_ARRAY1, int DIM1) {(int *v, int n)};
|
||||
%apply (double **ARGOUTVIEWM_ARRAY1, int *DIM1) {(double **w, int *n)};
|
||||
%apply (int **ARGOUTVIEWM_ARRAY1, int *DIM1) {(int **w, int *n)};
|
||||
|
||||
%include "loess.h"
|
||||
|
||||
extern void loess(struct loess_struct *lo);
|
||||
|
||||
extern void loess_summary(struct loess_struct *lo);
|
||||
|
||||
// not implemented
|
||||
// extern void predict(double *eval, int m, struct loess_struct *lo, struct pred_struct *pre, int se);
|
||||
|
||||
%exception {
|
||||
errno = 0;
|
||||
$action
|
||||
|
||||
if (errno != 0) {
|
||||
switch(errno) {
|
||||
case ENOMEM:
|
||||
PyErr_Format(PyExc_MemoryError, "memory allocation failed.");
|
||||
break;
|
||||
case EARRLEN:
|
||||
PyErr_Format(PyExc_ValueError, "unexpected array length.");
|
||||
break;
|
||||
default:
|
||||
PyErr_Format(PyExc_Exception, "unknown exception.");
|
||||
}
|
||||
SWIG_fail;
|
||||
}
|
||||
}
|
||||
|
||||
%extend loess_struct {
|
||||
//// constructor of a loess_struct
|
||||
//
|
||||
// @param n: number of data points.
|
||||
//
|
||||
// @param p: number of factors (independent variables). maximum 8.
|
||||
|
||||
loess_struct(int n, int p) {
|
||||
struct loess_struct *lo;
|
||||
lo = (struct loess_struct *) malloc(sizeof(loess_struct));
|
||||
|
||||
int i, max_kd;
|
||||
max_kd = n > 200 ? n : 200;
|
||||
|
||||
lo->in.y = (double *) malloc(n * sizeof(double));
|
||||
lo->in.x = (double *) malloc(n * p * sizeof(double));
|
||||
lo->in.weights = (double *) malloc(n * sizeof(double));
|
||||
for(i = 0; i < (n * p); i++)
|
||||
lo->in.x[i] = 0.0;
|
||||
for(i = 0; i < n; i++) {
|
||||
lo->in.y[i] = 0.0;
|
||||
lo->in.weights[i] = 1.0;
|
||||
}
|
||||
lo->in.n = n;
|
||||
lo->in.p = p;
|
||||
lo->model.span = 0.75;
|
||||
lo->model.degree = 2;
|
||||
lo->model.normalize = TRUE;
|
||||
for(i = 0; i < 8; i++)
|
||||
lo->model.parametric[i] = lo->model.drop_square[i] = FALSE;
|
||||
lo->model.family = "gaussian";
|
||||
lo->control.surface = "interpolate";
|
||||
lo->control.statistics = "approximate";
|
||||
lo->control.cell = 0.2;
|
||||
lo->control.trace_hat = "wait.to.decide";
|
||||
lo->control.iterations = 4;
|
||||
|
||||
lo->out.fitted_values = (double *) malloc(n * sizeof(double));
|
||||
lo->out.fitted_residuals = (double *) malloc(n * sizeof(double));
|
||||
lo->out.pseudovalues = (double *) malloc(n * sizeof(double));
|
||||
lo->out.diagonal = (double *) malloc(n * sizeof(double));
|
||||
lo->out.robust = (double *) malloc(n * sizeof(double));
|
||||
lo->out.divisor = (double *) malloc(p * sizeof(double));
|
||||
|
||||
lo->kd_tree.parameter = (int *) malloc(7 * sizeof(int));
|
||||
lo->kd_tree.a = (int *) malloc(max_kd * sizeof(int));
|
||||
lo->kd_tree.xi = (double *) malloc(max_kd * sizeof(double));
|
||||
lo->kd_tree.vert = (double *) malloc(p * 2 * sizeof(double));
|
||||
lo->kd_tree.vval = (double *) malloc((p + 1) * max_kd * sizeof(double));
|
||||
|
||||
return lo;
|
||||
}
|
||||
|
||||
~loess_struct() {
|
||||
free($self->in.x);
|
||||
free($self->in.y);
|
||||
free($self->in.weights);
|
||||
free($self->out.fitted_values);
|
||||
free($self->out.fitted_residuals);
|
||||
free($self->out.pseudovalues);
|
||||
free($self->out.diagonal);
|
||||
free($self->out.robust);
|
||||
free($self->out.divisor);
|
||||
free($self->kd_tree.parameter);
|
||||
free($self->kd_tree.a);
|
||||
free($self->kd_tree.xi);
|
||||
free($self->kd_tree.vert);
|
||||
free($self->kd_tree.vval);
|
||||
free($self);
|
||||
}
|
||||
|
||||
void set_x(double *v, int n) {
|
||||
int n_exp = $self->in.n * $self->in.p;
|
||||
if (n == n_exp) {
|
||||
int i;
|
||||
for(i = 0; i < n; i++)
|
||||
$self->in.x[i] = v[i];
|
||||
} else {
|
||||
errno = EARRLEN;
|
||||
}
|
||||
}
|
||||
|
||||
void set_y(double *v, int n) {
|
||||
int n_exp = $self->in.n;
|
||||
if (n == n_exp) {
|
||||
int i;
|
||||
for(i = 0; i < n; i++)
|
||||
$self->in.y[i] = v[i];
|
||||
} else {
|
||||
errno = EARRLEN;
|
||||
}
|
||||
}
|
||||
|
||||
void set_parametric(int *v, int n) {
|
||||
int n_exp = $self->in.p;
|
||||
if (n == n_exp) {
|
||||
int i;
|
||||
for(i = 0; i < n; i++)
|
||||
$self->model.parametric[i] = v[i];
|
||||
} else {
|
||||
errno = EARRLEN;
|
||||
}
|
||||
}
|
||||
|
||||
void set_drop_square(int *v, int n) {
|
||||
int n_exp = $self->in.p;
|
||||
if (n == n_exp) {
|
||||
int i;
|
||||
for(i = 0; i < n; i++)
|
||||
$self->model.drop_square[i] = v[i];
|
||||
} else {
|
||||
errno = EARRLEN;
|
||||
}
|
||||
}
|
||||
|
||||
void get_x(double **w, int *n) {
|
||||
int ni = $self->in.n * $self->in.p;
|
||||
|
||||
double *temp;
|
||||
temp = (double *)malloc(ni * sizeof(double));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->in.x[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
void get_y(double **w, int *n) {
|
||||
int ni = $self->in.n;
|
||||
|
||||
double *temp;
|
||||
temp = (double *)malloc(ni * sizeof(double));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->in.y[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
void get_weights(double **w, int *n) {
|
||||
int ni = $self->in.n;
|
||||
|
||||
double *temp;
|
||||
temp = (double *)malloc(ni * sizeof(double));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->in.weights[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
void get_fitted_values(double **w, int *n) {
|
||||
int ni = $self->in.n;
|
||||
|
||||
double *temp;
|
||||
temp = (double *)malloc(ni * sizeof(double));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->out.fitted_values[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
void get_fitted_residuals(double **w, int *n) {
|
||||
int ni = $self->in.n;
|
||||
|
||||
double *temp;
|
||||
temp = (double *)malloc(ni * sizeof(double));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->out.fitted_residuals[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
void get_parametric(int **w, int *n) {
|
||||
int ni = $self->in.p;
|
||||
|
||||
int *temp;
|
||||
temp = (int *)malloc(ni * sizeof(int));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->model.parametric[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
void get_drop_square(int **w, int *n) {
|
||||
int ni = $self->in.p;
|
||||
|
||||
int *temp;
|
||||
temp = (int *)malloc(ni * sizeof(int));
|
||||
if (temp == NULL)
|
||||
errno = ENOMEM;
|
||||
|
||||
int i;
|
||||
for(i = 0; i < ni; i++)
|
||||
temp[i] = $self->model.drop_square[i];
|
||||
|
||||
*w = temp;
|
||||
*n = ni;
|
||||
}
|
||||
|
||||
};
|
||||
75
pmsco/loess/loess.m
Normal file
75
pmsco/loess/loess.m
Normal file
@@ -0,0 +1,75 @@
|
||||
NAME
|
||||
|
||||
loess_setup, loess, loess_summary, loess_free_mem, anova
|
||||
|
||||
SYNOPSIS
|
||||
|
||||
#include "loess.h"
|
||||
double *x, *y;
|
||||
long n, p;
|
||||
struct loess_struct *lo, *lo2;
|
||||
struct anova_struct *aov;
|
||||
|
||||
void loess_setup(x, y, n, p, lo)
|
||||
|
||||
void loess(lo)
|
||||
|
||||
void loess_summary(lo)
|
||||
|
||||
void loess_free_mem(lo)
|
||||
|
||||
void anova(lo, lo2, aov);
|
||||
|
||||
PARAMETERS
|
||||
|
||||
x predictors vector (of length n * p)
|
||||
The j-th coordinate of the i-th point is in x[i+n*j],
|
||||
where 0<=j<p, 0<=i<n.
|
||||
|
||||
y response vector (of length n).
|
||||
|
||||
n number of observations.
|
||||
|
||||
p number of variables/predictors.
|
||||
|
||||
lo copy of data; controls; k-d tree and coefficients.
|
||||
|
||||
aov results of the F-test in the analysis of variance.
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
loess_setup() sets up all default values in loess_struct's in,
|
||||
model, and control structures; it also allocates memory for the
|
||||
kd_tree and out structures based on n and p. Caller can then
|
||||
override any of these parameters by explicitly redefining them
|
||||
before the call to loess() (see sample.c). loess_setup()
|
||||
has the side-effect of copying x, y, n, and p into the in
|
||||
structure for ease of arguments-passing in subsequent calls to
|
||||
other loess and predict routines.
|
||||
|
||||
loess() takes this structure, and does the actual loess
|
||||
computation. It stored the results in the out structure.
|
||||
|
||||
loess_summary() is a simple utility routine that summarizes the
|
||||
results of the loess computation. Since it takes in the whole
|
||||
loess structure as its argument, it has the potential of printing
|
||||
out any parameter of interest with only a slight modification to
|
||||
the code.
|
||||
|
||||
loess_free_mem() frees up all dynamically allocated memory
|
||||
used by the loess structure.
|
||||
|
||||
anova() performs an analysis of variance on two loess models, and
|
||||
stores the results of the F-test in the anova_struct structure.
|
||||
|
||||
loess_struct and anova_struct are defined in loess.h and documented
|
||||
in struct.m. Although the internal arrays are allocated by
|
||||
loess_setup(), the struct arguments (lo, lo2, aov) should be
|
||||
allocated by the caller. Thus a typical call would be
|
||||
struct loess_struct lo;
|
||||
loess_setup(x,y,n,p.&lo);
|
||||
|
||||
SEE ALSO
|
||||
|
||||
predict, pointwise, pred_free_mem, pw_free_mem
|
||||
|
||||
347
pmsco/loess/loessc.c
Normal file
347
pmsco/loess/loessc.c
Normal file
@@ -0,0 +1,347 @@
|
||||
#include "S.h"
|
||||
|
||||
#define min(x,y) ((x) < (y) ? (x) : (y))
|
||||
#define max(x,y) ((x) > (y) ? (x) : (y))
|
||||
#define GAUSSIAN 1
|
||||
#define SYMMETRIC 0
|
||||
|
||||
static int *iv, liv, lv, tau;
|
||||
static double *v;
|
||||
|
||||
loess_raw(y, x, weights, robust, d, n, span, degree, nonparametric,
|
||||
drop_square, sum_drop_sqr, cell, surf_stat, surface, parameter, a,
|
||||
xi, vert, vval, diagonal, trL, one_delta, two_delta, setLf)
|
||||
double *y, *x, *weights, *robust, *span, *cell, *surface, *xi, *vert,
|
||||
*vval, *diagonal, *trL, *one_delta, *two_delta;
|
||||
int *d, *n, *parameter, *a, *degree, *nonparametric, *drop_square,
|
||||
*sum_drop_sqr, *setLf;
|
||||
char **surf_stat;
|
||||
{
|
||||
int zero = 0, one = 1, two = 2, nsing, i, k;
|
||||
double *hat_matrix, *LL;
|
||||
|
||||
*trL = 0;
|
||||
loess_workspace(d, n, span, degree, nonparametric, drop_square,
|
||||
sum_drop_sqr, setLf);
|
||||
v[1] = *cell;
|
||||
if(!strcmp(*surf_stat, "interpolate/none")) {
|
||||
F77_SUB(lowesb)(x, y, robust, &zero, &zero, iv, &liv, &lv, v);
|
||||
F77_SUB(lowese)(iv, &liv, &lv, v, n, x, surface);
|
||||
loess_prune(parameter, a, xi, vert, vval);
|
||||
}
|
||||
else if (!strcmp(*surf_stat, "direct/none")) {
|
||||
F77_SUB(lowesf)(x, y, robust, iv, &liv, &lv, v, n, x,
|
||||
&zero, &zero, surface);
|
||||
}
|
||||
else if (!strcmp(*surf_stat, "interpolate/1.approx")) {
|
||||
F77_SUB(lowesb)(x, y, weights, diagonal, &one, iv, &liv, &lv, v);
|
||||
F77_SUB(lowese)(iv, &liv, &lv, v, n, x, surface);
|
||||
nsing = iv[29];
|
||||
for(i = 0; i < (*n); i++) *trL = *trL + diagonal[i];
|
||||
F77_SUB(lowesa)(trL, n, d, &tau, &nsing, one_delta, two_delta);
|
||||
loess_prune(parameter, a, xi, vert, vval);
|
||||
}
|
||||
else if (!strcmp(*surf_stat, "interpolate/2.approx")) {
|
||||
F77_SUB(lowesb)(x, y, robust, &zero, &zero, iv, &liv, &lv, v);
|
||||
F77_SUB(lowese)(iv, &liv, &lv, v, n, x, surface);
|
||||
nsing = iv[29];
|
||||
F77_SUB(ehg196)(&tau, d, span, trL);
|
||||
F77_SUB(lowesa)(trL, n, d, &tau, &nsing, one_delta, two_delta);
|
||||
loess_prune(parameter, a, xi, vert, vval);
|
||||
}
|
||||
else if (!strcmp(*surf_stat, "direct/approximate")) {
|
||||
F77_SUB(lowesf)(x, y, weights, iv, &liv, &lv, v, n, x,
|
||||
diagonal, &one, surface);
|
||||
nsing = iv[29];
|
||||
for(i = 0; i < (*n); i++) *trL = *trL + diagonal[i];
|
||||
F77_SUB(lowesa)(trL, n, d, &tau, &nsing, one_delta, two_delta);
|
||||
}
|
||||
else if (!strcmp(*surf_stat, "interpolate/exact")) {
|
||||
hat_matrix = Calloc((*n)*(*n), double);
|
||||
LL = Calloc((*n)*(*n), double);
|
||||
F77_SUB(lowesb)(x, y, weights, diagonal, &one, iv, &liv, &lv, v);
|
||||
F77_SUB(lowesl)(iv, &liv, &lv, v, n, x, hat_matrix);
|
||||
F77_SUB(lowesc)(n, hat_matrix, LL, trL, one_delta, two_delta);
|
||||
F77_SUB(lowese)(iv, &liv, &lv, v, n, x, surface);
|
||||
loess_prune(parameter, a, xi, vert, vval);
|
||||
Free(hat_matrix);
|
||||
Free(LL);
|
||||
}
|
||||
else if (!strcmp(*surf_stat, "direct/exact")) {
|
||||
hat_matrix = Calloc((*n)*(*n), double);
|
||||
LL = Calloc((*n)*(*n), double);
|
||||
F77_SUB(lowesf)(x, y, weights, iv, liv, lv, v, n, x,
|
||||
hat_matrix, &two, surface);
|
||||
F77_SUB(lowesc)(n, hat_matrix, LL, trL, one_delta, two_delta);
|
||||
k = (*n) + 1;
|
||||
for(i = 0; i < (*n); i++)
|
||||
diagonal[i] = hat_matrix[i * k];
|
||||
Free(hat_matrix);
|
||||
Free(LL);
|
||||
}
|
||||
loess_free();
|
||||
}
|
||||
|
||||
loess_dfit(y, x, x_evaluate, weights, span, degree, nonparametric,
|
||||
drop_square, sum_drop_sqr, d, n, m, fit)
|
||||
double *y, *x, *x_evaluate, *weights, *span, *fit;
|
||||
int *degree, *nonparametric, *drop_square, *sum_drop_sqr, *d, *n, *m;
|
||||
{
|
||||
int zero = 0, one = 1;
|
||||
|
||||
loess_workspace(d, n, span, degree, nonparametric, drop_square,
|
||||
sum_drop_sqr, &zero);
|
||||
F77_SUB(lowesf)(x, y, weights, iv, &liv, &lv, v, m, x_evaluate,
|
||||
&zero, &zero, fit);
|
||||
loess_free();
|
||||
}
|
||||
|
||||
loess_dfitse(y, x, x_evaluate, weights, robust, family, span, degree,
|
||||
nonparametric, drop_square, sum_drop_sqr, d, n, m, fit, L)
|
||||
double *y, *x, *x_evaluate, *weights, *robust, *span, *fit, *L;
|
||||
int *family, *degree, *nonparametric, *drop_square, *sum_drop_sqr,
|
||||
*d, *n, *m;
|
||||
{
|
||||
int zero = 0, one = 1, two = 2;
|
||||
|
||||
loess_workspace(d, n, span, degree, nonparametric, drop_square,
|
||||
sum_drop_sqr, &zero);
|
||||
if(*family == GAUSSIAN)
|
||||
F77_SUB(lowesf)(x, y, weights, iv, &liv, &lv, v, m,
|
||||
x_evaluate, L, &two, fit);
|
||||
else if(*family == SYMMETRIC)
|
||||
{
|
||||
F77_SUB(lowesf)(x, y, weights, iv, &liv, &lv, v, m,
|
||||
x_evaluate, L, &two, fit);
|
||||
F77_SUB(lowesf)(x, y, robust, iv, &liv, &lv, v, m,
|
||||
x_evaluate, &zero, &zero, fit);
|
||||
}
|
||||
loess_free();
|
||||
}
|
||||
loess_ifit(parameter, a, xi, vert, vval, m, x_evaluate, fit)
|
||||
double *xi, *vert, *vval, *x_evaluate, *fit;
|
||||
int *parameter, *a, *m;
|
||||
{
|
||||
loess_grow(parameter, a, xi, vert, vval);
|
||||
F77_SUB(lowese)(iv, &liv, &lv, v, m, x_evaluate, fit);
|
||||
loess_free();
|
||||
}
|
||||
|
||||
loess_ise(y, x, x_evaluate, weights, span, degree, nonparametric,
|
||||
drop_square, sum_drop_sqr, cell, d, n, m, fit, L)
|
||||
double *y, *x, *x_evaluate, *weights, *span, *cell, *fit, *L;
|
||||
int *degree, *nonparametric, *drop_square, *sum_drop_sqr, *d, *n, *m;
|
||||
{
|
||||
int zero = 0, one = 1;
|
||||
|
||||
loess_workspace(d, n, span, degree, nonparametric, drop_square,
|
||||
sum_drop_sqr, &one);
|
||||
v[1] = *cell;
|
||||
F77_SUB(lowesb)(x, y, weights, &zero, &zero, iv, &liv, &lv, v);
|
||||
F77_SUB(lowesl)(iv, &liv, &lv, v, m, x_evaluate, L);
|
||||
loess_free();
|
||||
}
|
||||
|
||||
loess_workspace(d, n, span, degree, nonparametric, drop_square,
|
||||
sum_drop_sqr, setLf)
|
||||
int *d, *n, *degree, *nonparametric, *drop_square, *sum_drop_sqr,
|
||||
*setLf;
|
||||
double *span;
|
||||
{
|
||||
int D, N, tau0, nvmax, nf, version = 106, i;
|
||||
|
||||
D = *d;
|
||||
N = *n;
|
||||
nvmax = max(200, N);
|
||||
nf = min(N, floor(N * (*span)));
|
||||
tau0 = ((*degree) > 1) ? ((D + 2) * (D + 1) * 0.5) : (D + 1);
|
||||
tau = tau0 - (*sum_drop_sqr);
|
||||
lv = 50 + (3 * D + 3) * nvmax + N + (tau0 + 2) * nf;
|
||||
liv = 50 + ((int)pow((double)2, (double)D) + 4) * nvmax + 2 * N;
|
||||
if(*setLf) {
|
||||
lv = lv + (D + 1) * nf * nvmax;
|
||||
liv = liv + nf * nvmax;
|
||||
}
|
||||
iv = Calloc(liv, int);
|
||||
v = Calloc(lv, double);
|
||||
|
||||
F77_SUB(lowesd)(&version, iv, &liv, &lv, v, d, n, span, degree,
|
||||
&nvmax, setLf);
|
||||
iv[32] = *nonparametric;
|
||||
for(i = 0; i < D; i++)
|
||||
iv[i + 40] = drop_square[i];
|
||||
}
|
||||
|
||||
loess_prune(parameter, a, xi, vert, vval)
|
||||
double *xi, *vert, *vval;
|
||||
int *parameter, *a;
|
||||
{
|
||||
int d, vc, a1, v1, xi1, vv1, nc, nv, nvmax, i, j, k;
|
||||
|
||||
d = iv[1];
|
||||
vc = iv[3] - 1;
|
||||
nc = iv[4];
|
||||
nv = iv[5];
|
||||
a1 = iv[6] - 1;
|
||||
v1 = iv[10] - 1;
|
||||
xi1 = iv[11] - 1;
|
||||
vv1 = iv[12] - 1;
|
||||
nvmax = iv[13];
|
||||
|
||||
for(i = 0; i < 5; i++)
|
||||
parameter[i] = iv[i + 1];
|
||||
parameter[5] = iv[21] - 1;
|
||||
parameter[6] = iv[14] - 1;
|
||||
|
||||
for(i = 0; i < d; i++){
|
||||
k = nvmax * i;
|
||||
vert[i] = v[v1 + k];
|
||||
vert[i + d] = v[v1 + vc + k];
|
||||
}
|
||||
for(i = 0; i < nc; i++) {
|
||||
xi[i] = v[xi1 + i];
|
||||
a[i] = iv[a1 + i];
|
||||
}
|
||||
k = (d + 1) * nv;
|
||||
for(i = 0; i < k; i++)
|
||||
vval[i] = v[vv1 + i];
|
||||
}
|
||||
|
||||
loess_grow(parameter, a, xi, vert, vval)
|
||||
double *xi, *vert, *vval;
|
||||
int *parameter, *a;
|
||||
{
|
||||
int d, vc, nc, nv, a1, v1, xi1, vv1, i, j, k;
|
||||
|
||||
d = parameter[0];
|
||||
vc = parameter[2];
|
||||
nc = parameter[3];
|
||||
nv = parameter[4];
|
||||
liv = parameter[5];
|
||||
lv = parameter[6];
|
||||
iv = Calloc(liv, int);
|
||||
v = Calloc(lv, double);
|
||||
|
||||
iv[1] = d;
|
||||
iv[2] = parameter[1];
|
||||
iv[3] = vc;
|
||||
iv[5] = iv[13] = nv;
|
||||
iv[4] = iv[16] = nc;
|
||||
iv[6] = 50;
|
||||
iv[7] = iv[6] + nc;
|
||||
iv[8] = iv[7] + vc * nc;
|
||||
iv[9] = iv[8] + nc;
|
||||
iv[10] = 50;
|
||||
iv[12] = iv[10] + nv * d;
|
||||
iv[11] = iv[12] + (d + 1) * nv;
|
||||
iv[27] = 173;
|
||||
|
||||
v1 = iv[10] - 1;
|
||||
xi1 = iv[11] - 1;
|
||||
a1 = iv[6] - 1;
|
||||
vv1 = iv[12] - 1;
|
||||
|
||||
for(i = 0; i < d; i++) {
|
||||
k = nv * i;
|
||||
v[v1 + k] = vert[i];
|
||||
v[v1 + vc - 1 + k] = vert[i + d];
|
||||
}
|
||||
for(i = 0; i < nc; i++) {
|
||||
v[xi1 + i] = xi[i];
|
||||
iv[a1 + i] = a[i];
|
||||
}
|
||||
k = (d + 1) * nv;
|
||||
for(i = 0; i < k; i++)
|
||||
v[vv1 + i] = vval[i];
|
||||
|
||||
F77_SUB(ehg169)(&d, &vc, &nc, &nc, &nv, &nv, v+v1, iv+a1,
|
||||
v+xi1, iv+iv[7]-1, iv+iv[8]-1, iv+iv[9]-1);
|
||||
}
|
||||
|
||||
loess_free()
|
||||
{
|
||||
Free(v);
|
||||
Free(iv);
|
||||
}
|
||||
|
||||
/* begin ehg's FORTRAN-callable C-codes */
|
||||
|
||||
void
|
||||
F77_SUB(ehg182)(i)
|
||||
int *i;
|
||||
{
|
||||
char *mess, mess2[50];
|
||||
switch(*i){
|
||||
case 100: mess="wrong version number in lowesd. Probably typo in caller."; break;
|
||||
case 101: mess="d>dMAX in ehg131. Need to recompile with increased dimensions."; break;
|
||||
case 102: mess="liv too small. (Discovered by lowesd)"; break;
|
||||
case 103: mess="lv too small. (Discovered by lowesd)"; break;
|
||||
case 104: mess="span too small. fewer data values than degrees of freedom."; break;
|
||||
case 105: mess="k>d2MAX in ehg136. Need to recompile with increased dimensions."; break;
|
||||
case 106: mess="lwork too small"; break;
|
||||
case 107: mess="invalid value for kernel"; break;
|
||||
case 108: mess="invalid value for ideg"; break;
|
||||
case 109: mess="lowstt only applies when kernel=1."; break;
|
||||
case 110: mess="not enough extra workspace for robustness calculation"; break;
|
||||
case 120: mess="zero-width neighborhood. make span bigger"; break;
|
||||
case 121: mess="all data on boundary of neighborhood. make span bigger"; break;
|
||||
case 122: mess="extrapolation not allowed with blending"; break;
|
||||
case 123: mess="ihat=1 (diag L) in l2fit only makes sense if z=x (eval=data)."; break;
|
||||
case 171: mess="lowesd must be called first."; break;
|
||||
case 172: mess="lowesf must not come between lowesb and lowese, lowesr, or lowesl."; break;
|
||||
case 173: mess="lowesb must come before lowese, lowesr, or lowesl."; break;
|
||||
case 174: mess="lowesb need not be called twice."; break;
|
||||
case 175: mess="need setLf=.true. for lowesl."; break;
|
||||
case 180: mess="nv>nvmax in cpvert."; break;
|
||||
case 181: mess="nt>20 in eval."; break;
|
||||
case 182: mess="svddc failed in l2fit."; break;
|
||||
case 183: mess="didnt find edge in vleaf."; break;
|
||||
case 184: mess="zero-width cell found in vleaf."; break;
|
||||
case 185: mess="trouble descending to leaf in vleaf."; break;
|
||||
case 186: mess="insufficient workspace for lowesf."; break;
|
||||
case 187: mess="insufficient stack space"; break;
|
||||
case 188: mess="lv too small for computing explicit L"; break;
|
||||
case 191: mess="computed trace L was negative; something is wrong!"; break;
|
||||
case 192: mess="computed delta was negative; something is wrong!"; break;
|
||||
case 193: mess="workspace in loread appears to be corrupted"; break;
|
||||
case 194: mess="trouble in l2fit/l2tr"; break;
|
||||
case 195: mess="only constant, linear, or quadratic local models allowed"; break;
|
||||
case 196: mess="degree must be at least 1 for vertex influence matrix"; break;
|
||||
case 999: mess="not yet implemented"; break;
|
||||
default: sprintf(mess=mess2,"Assert failed; error code %d\n",*i); break;
|
||||
}
|
||||
Recover(mess,NULL_ENTRY); /* in /usr/s/current/src/qpe/debug.c */
|
||||
}
|
||||
|
||||
void
|
||||
F77_SUB(ehg183)(s,i,n,inc)
|
||||
char *s;
|
||||
int *i, *n, *inc;
|
||||
{
|
||||
char mess[4000], num[20];
|
||||
int j;
|
||||
strcpy(mess,s);
|
||||
for (j=0; j<*n; j++) {
|
||||
sprintf(num," %d",i[j * *inc]);
|
||||
strcat(mess,num);
|
||||
}
|
||||
strcat(mess,"\n");
|
||||
Warning(mess,NULL_ENTRY);
|
||||
}
|
||||
|
||||
void
|
||||
F77_SUB(ehg184)(s,x,n,inc)
|
||||
char *s;
|
||||
double *x;
|
||||
int *n, *inc;
|
||||
{
|
||||
char mess[4000], num[30];
|
||||
int j;
|
||||
strcpy(mess,s);
|
||||
for (j=0; j<*n; j++) {
|
||||
sprintf(num," %.5g",x[j * *inc]);
|
||||
strcat(mess,num);
|
||||
}
|
||||
strcat(mess,"\n");
|
||||
Warning(mess,NULL_ENTRY);
|
||||
}
|
||||
2198
pmsco/loess/loessf.f
Normal file
2198
pmsco/loess/loessf.f
Normal file
File diff suppressed because it is too large
Load Diff
201
pmsco/loess/loessf.m
Normal file
201
pmsco/loess/loessf.m
Normal file
@@ -0,0 +1,201 @@
|
||||
***************************************************************
|
||||
* LOESS smoothing scattered data in one or more variables *
|
||||
* documentation of Fortran routines *
|
||||
* Cleveland, Devlin, Grosse, Shyu *
|
||||
***************************************************************
|
||||
|
||||
1. The typical program would call lowesd, set tolerances in iv,v if
|
||||
desired, then call lowesb and lowese.
|
||||
2. To save the k-d tree, call lowesd, lowesb and then losave; subsequent
|
||||
programs would call lohead, set liv and lv, then call loread and lowese.
|
||||
3. For statistics, get diagL and then call lowesa or get the full hat
|
||||
matrix and call lowesc. Robustness iterations can take advantage of
|
||||
lowesw and lowesp.
|
||||
|
||||
lowesd(106,iv,liv,lv,v,d,n,f,tdeg,nvmax,setLf) setup workspace
|
||||
lowesf(x,y,w,iv,liv,lv,v,m,z,L,hat,s) slow smooth at z
|
||||
lowesb(x,y,w,diagL,infl,iv,liv,lv,v) build k-d tree
|
||||
lowesr(y,iv,liv,lv,v) rebuild with new data values
|
||||
(does not change y)
|
||||
lowese(iv,liv,lv,v,m,z, s) evaluate smooth at z
|
||||
lowesl(iv,liv,lv,v,m,z, L) explicit hat matrix,
|
||||
which maps from y to z
|
||||
lofort(iunit,iv,liv,lv,v) save k-d tree as Fortran
|
||||
losave(iunit,iv,liv,lv,v) save k-d tree in file
|
||||
lohead(iunit,d,vc,nc,nv) read d,vc,nc,nv from file
|
||||
liv = 50+(vc+3)*nc determine space
|
||||
lv = 50+(2*d+1)*nv+nc requirements
|
||||
loread(iunit,d,vc,nc,nv,iv,liv,lv,v) finish reading k-d tree,
|
||||
ready for lowese
|
||||
lowesa(trL,n,d,tau,nsing, del1,del2) approximate delta
|
||||
lowesc(n,L,LL, trL,del1,del2) exact delta
|
||||
lowesp(n,y,yhat,w,rw, pi,ytilde) pseudo-values
|
||||
lowesw(res,n, rw,pi) robustness weights
|
||||
|
||||
=== arguments ===
|
||||
d number of independent variables [integer] (called "p" elsewhere)
|
||||
del1,del2 delta1, delta2
|
||||
diagL diagonal of hat matrix, only set if infl=.true. (n)
|
||||
f fraction of points to use in local smooth (called "alpha" elsewhere)
|
||||
fc don't refine cells with less than fc*n points; ordinarily=.05
|
||||
hat is hat matrix desired? [integer]
|
||||
0 = none
|
||||
1 = diagonal only
|
||||
2 = full matrix
|
||||
infl is diagonal of hat matrix desired? [logical]
|
||||
iunit Fortran unit number for i/o
|
||||
iv workspace (liv)
|
||||
L hat matrix (m,n) [real]
|
||||
in lowesf, only computed if hat nonzero; if hat=1 only size (n)
|
||||
LL workspace (n,n)
|
||||
liv 50+(2^d+4)*nvmax+2*n
|
||||
if setLf, add nf*nvmax
|
||||
lv 50+(3*d+3)*nvmax+n+(tau0+2)*nf
|
||||
if setLf, add (d+1)*nf*nvmax
|
||||
m number of points to smooth at; ordinarily=n
|
||||
n number of observations
|
||||
nf min(n,floor(n*f))
|
||||
nsing if 0, print warning in lowesa when trL<tau; typically nsing=iv(30)
|
||||
nvmax limit on number of vertices for kd-tree; e.g. max(200,n)
|
||||
pi workspace (n) [integer]
|
||||
res residual yhat-y (n)
|
||||
rw robustness weights (n)
|
||||
s smoothed values at z (m)
|
||||
setLf in lowesb, save matrix factorizations [logical]
|
||||
(needed for lowesr and lowesl)
|
||||
tau dimension of local model = iv(DIM);
|
||||
=d+1 for linear, (d+2)(d+1)/2 for quadratic
|
||||
reduced if dropping squares
|
||||
tau0 =d+1 for linear, (d+2)(d+1)/2 for quadratic
|
||||
tdeg polynomials to fit; 0=constants, 1=linear, 2=quadratics
|
||||
trL trace L = sum diagL
|
||||
v workspace (lv)
|
||||
w weights (n) local regression: min sum wi * (f(xi)-yi)^2
|
||||
x sample locations (n,d)
|
||||
y observations (n)
|
||||
yhat smoothed y (n)
|
||||
ytilde pseudo y (n)
|
||||
z locations where smooth is desired (m,d)
|
||||
|
||||
If using the double precision version, [real] above should be understood
|
||||
as Fortran "double precision".
|
||||
|
||||
The first argument to lowesd is a version number, updated when calling
|
||||
sequences change.
|
||||
|
||||
If you peek inside the fortran, you will quickly notice that it
|
||||
was machine generated; the typeset original (in the language "pine")
|
||||
is much easier to read.
|
||||
|
||||
=== iv indices ===
|
||||
1 INFO return code (not currently used)
|
||||
2 D number of independent variables
|
||||
3 N number of observations
|
||||
4 VC 2^d (number of vertices of a cell)
|
||||
5 NC number of k-d cells
|
||||
6 NV number of k-d vertices
|
||||
7 A1 starting index in iv of a
|
||||
8 C1 starting index in iv of c
|
||||
9 HI1 starting index in iv of hi
|
||||
10 LO1 starting index in iv of lo
|
||||
11 V1 starting index in v of vertices
|
||||
12 XI1 starting index in v of cut values
|
||||
13 VV1 starting index in v of vertex values
|
||||
14 NVMAX maximum allowed value of nv
|
||||
15 WORK1 starting index in v of workspace
|
||||
16 WORK2 starting index in v of workspace
|
||||
17 NCMAX maximum allowed value of nc
|
||||
18 WORK3 starting index in v of workspace
|
||||
19 NF floor(n*f) (number of points used as neighborhood)
|
||||
20 KERNEL 1=tricube, 2=unif
|
||||
21 KIND 1=k-d,cubic blend, (not implemented:2=quadtree,3=triangulation)
|
||||
22 PI1 starting index in iv of tree permutation
|
||||
23 VH starting index in iv of vhit
|
||||
24 VV2 starting index in v of work vval used in trL computation
|
||||
25 LQ starting index in iv of Lq
|
||||
26 WORK4 starting index in v of workspace
|
||||
27 PSI1 starting index in iv of workspace permutation
|
||||
28 SEQ sequence number, to check if routines called out of order
|
||||
takes on values:
|
||||
171 after lowesd
|
||||
172 after lowesf
|
||||
173 after lowesb
|
||||
29 DIM dimension of local regression
|
||||
1 constant
|
||||
d+1 linear (default)
|
||||
(d+2)(d+1)/2 quadratic
|
||||
Modified by ehg127 if cdeg<tdeg.
|
||||
30 SING number of times singular tolerance was met in l2fit, l2tr
|
||||
31 PRINT verbose output?
|
||||
32 DEG total degree (of polynomial for local model)
|
||||
33 NDIST dd = variables 1:dd enter into distance calculation
|
||||
34 LF starting index in v of Lf
|
||||
35..40 reserved for future use
|
||||
41..49 CDEG componentwise degree
|
||||
iv(A1) a coordinate of cut; 0 for leaf (nc)
|
||||
iv(C1) c pointers to corners (index into vertex array v) (vc,nc)
|
||||
iv(HI1) hi right subcell (nc)
|
||||
iv(LO1) lo left subcell (nc)
|
||||
Leaf cell j encloses points x(pi(i),), lo(j)<=i<=hi(j).
|
||||
Also, iv(C1),...,iv(PI1-1) is used as workspace (t) by l2fit
|
||||
------------------------eval only needs workspace up to here
|
||||
iv(PI1) pi permutation of 1:n for listing points in cells
|
||||
iv(VH) vhit cell whose subdivision creates vertex (nv)
|
||||
0 if vertex is corner of original bounding box.
|
||||
iv(LQ) Lq active point indices for block of Lf (nvmax*nf)
|
||||
iv(PSI1) psi workspace permutation of 1:n for sorting distances
|
||||
|
||||
=== v indices ===
|
||||
1 F fraction of n to be used as neighborhood. See also iv(19).
|
||||
2 FCELL no refinement if #points <= fcell * n
|
||||
default .05
|
||||
3 FDIAM no refinement if diameter is fdiam * overall bounding box
|
||||
default 0; Warning: reset to 0 by ehg142 when nsteps>0.
|
||||
4 RCOND reciprocal condition number
|
||||
... 49 reserved for future use
|
||||
iv(V1) v vertices (nv,d)
|
||||
iv(VV1) vval vertex values (0:d,nv)
|
||||
iv(XI1) xi cut values (nc)
|
||||
------------------------eval only needs workspace up to here
|
||||
iv(WORK1) workspace (n) l2fit:dist
|
||||
iv(WORK2) workspace (nf) l2fit:eta
|
||||
iv(WORK3) workspace (dim,nf) l2fit:X
|
||||
iv(VV2) vval2 workspace ((d+1)*nv) pseudo-vval for trL computation
|
||||
iv(LF) Lf hat matrix (data to vertex) ((d+1)*nvmax*nf)
|
||||
iv(WORK4) workspace (nf) l2fit:w
|
||||
|
||||
Internal routine names have been hidden as follows:
|
||||
ehg106 select q-th smallest by partial sorting
|
||||
ehg124 rbuild
|
||||
ehg125 cpvert
|
||||
ehg126 bbox
|
||||
ehg127 l2fit,l2tr computational kernel
|
||||
ehg128 eval
|
||||
ehg129 spread
|
||||
ehg131 lowesb after workspace expansion
|
||||
ehg133 lowese after workspace expansion
|
||||
ehg134 abort by calling S Recover function
|
||||
ehg136 l2fit with hat matrix L
|
||||
ehg137 vleaf
|
||||
ehg138 descend
|
||||
ehg139 l2tr
|
||||
ehg140(w,i,j) w(i)=j used when w is declared real, but should store an int
|
||||
ehg141 delta1,2 from trL
|
||||
ehg142 robust iteration
|
||||
ehg144 now called lowesc
|
||||
ehg152 like ehg142, but for lowesf
|
||||
ehg167 kernel for losave
|
||||
ehg168 kernel for loread
|
||||
ehg169 compute derived k-d tree information
|
||||
ehg170 generate Fortran
|
||||
ehg176,ehg177,ehg178,ehg179,ehg180,ehg181 loeval for delta
|
||||
ehg182 ehgdie(i)
|
||||
ehg183 warning(message,i,n,inc)
|
||||
ehg184 warning(message,x,n,inc)
|
||||
ehg190 now called lowesa, with slight change in calling sequence
|
||||
ehg191 lowesl after workspace expansion
|
||||
ehg192 lowesr after workspace expansion
|
||||
ehg196(tau,d,f,trl) trL approximation
|
||||
ehg197 for deg=1,2
|
||||
m9rwt now called lowesw
|
||||
pseudo now called lowesp
|
||||
172
pmsco/loess/madeup.c
Normal file
172
pmsco/loess/madeup.c
Normal file
@@ -0,0 +1,172 @@
|
||||
#include <stdio.h>
|
||||
#include "loess.h"
|
||||
|
||||
struct loess_struct madeup, madeup_new, madeup2;
|
||||
struct pred_struct madeup_pred;
|
||||
struct ci_struct madeup_ci;
|
||||
struct anova_struct madeup_anova;
|
||||
double one_two[] = {-0.957581198938384, -2.80954937859791, -0.696510845605909,
|
||||
3.45100038854536, 0.509259838818042, 0.557854035598286,
|
||||
0.0525817201002309, -2.05064423770094, -1.11567547099143,
|
||||
-1.18366549451658, 0.511958575232253, 0.334364244817592,
|
||||
-2.05706205756846, -0.121896645718402, 0.54423804521097,
|
||||
0.600501641888935, 0.531074442421607, 0.495400347786053,
|
||||
-1.60860176347294, 0.277370954937718, 0.290464363258084,
|
||||
0.579894254111128, -0.290441177117614, 1.30622601704777,
|
||||
-0.482897816720494, -0.716423394441349, 0.742412540254878,
|
||||
-0.91161346344296, 1.27943556865527, -0.189153217811851,
|
||||
0.592292730243945, 0.952415888511291, 0.491436176457309,
|
||||
-0.30568088056691, -0.363871357644093, -0.285424162901343,
|
||||
-0.0372094292657342, -0.923529247741133, 1.13805430719146,
|
||||
-1.33122338081553, 0.55123448290722, -0.852726057929887,
|
||||
1.19687530878469, 0.498781686408254, 0.320179856418398,
|
||||
0.21244678210441, 1.00935803951191, -0.900989007058962,
|
||||
1.13216444413294, 0.0188670824356362, 0.424169515300288,
|
||||
-0.19862121711326, 0.955170163539181, 0.948320512371124,
|
||||
0.473848149342783, -0.699121305560135, -0.612853026250685,
|
||||
0.580431200426044, 1.27799640925722, 0.806797458367235,
|
||||
-1.03855925707748, 1.00866312622584, -0.578256568822387,
|
||||
-0.323244575961333, -0.756301997657156, 1.38635211208482,
|
||||
0.722419488760045, -1.2160777034384, -0.498279906600592,
|
||||
0.726247405185, -0.260119271608589, -0.741134528045221,
|
||||
-0.184110574491516, 0.307761674659839, 0.464568227698959,
|
||||
-0.25253136951752, -0.486503680414154, 0.426634057655542,
|
||||
-1.30396915580526, 0.0671486396913438, 1.77117635735777,
|
||||
0.907249468179712, 0.432349548721498, 1.41989705188111,
|
||||
-0.413389471016361, 2.44202481656431, 0.0411377482323225,
|
||||
0.509505377681864, -0.282743502058313, 0.179881630718384,
|
||||
-1.18808328118875, 0.98265314676344, -1.04288590077335,
|
||||
1.18136543233696, -0.398339818481707, -1.33556478800344,
|
||||
-0.502789555455575, 0.484761653956289, -0.806445812279308,
|
||||
1.41207651978306, -0.878873945799123, -0.935197083131863,
|
||||
-0.33925477332393439, 0.16449721487453731, 1.3700178698345999,
|
||||
-1.4946841727166, 1.3805047732704381,
|
||||
0.88508389905048512, 0.83560940141892148, 0.89623509727336315,
|
||||
-1.289541425794579, 0.2332028995229195, 1.183197522953588,
|
||||
-0.85793361589157902, -1.33423445483833, -0.9233512315474407,
|
||||
0.76914556896670361, -0.37794794349382183, 0.059114341211622581,
|
||||
-1.8706153553475069, -0.67786838062170507, 0.038184754648735768,
|
||||
0.37530087746353391, 0.96471695952212921, 0.69505105492152874,
|
||||
-0.34214020737803602, -1.1454631827640021, -0.99324551114161375,
|
||||
-0.13057284978088679, 1.213711380869505, 0.29124075688915307,
|
||||
1.106890512068581, 0.94957063346615733, 0.46367541051066768,
|
||||
0.45572327290248621, 0.39878553409592049, -0.015849431703916221,
|
||||
-1.3973725035064171, 0.7700624622976332, 0.083291190129894818,
|
||||
0.53179773252409901, 0.049727349788233177, -0.73414037626738005,
|
||||
-0.96348659055127073, 0.57356064323574374, -0.28194211032947131,
|
||||
-0.59450289683584279, 0.77026173196827941, 1.0739830028467161,
|
||||
-0.61570603602075391, -0.084794357704615464, -0.49163022652120109,
|
||||
-1.526968705617602, -0.19688130817103111, 0.1656534453607213,
|
||||
0.19835657518696179, 0.97492977599052544, -0.95484796495550817,
|
||||
0.58847390467129868, -0.42688317000127768, 0.1771186872105201,
|
||||
-0.91644209647809238, -1.8851386926119349, 0.086893856222760746,
|
||||
0.45630642515021741, 0.17428542070878469, -0.0013077214871275221,
|
||||
-0.00058541929918550742, 0.28402285608099398, -0.36567881757010029,
|
||||
-0.54886653165173238, 0.8578476816688223, 0.69909448655308448,
|
||||
-0.14002628501260239, 1.332454137144605, 1.6017946938719501,
|
||||
0.01241549637061686, 0.24342918633361621, 1.0773689561938919,
|
||||
1.8592463357601141, 0.18590984985424869, 0.033342258305766252,
|
||||
0.6130082357970067, 1.068594886375418, -0.68330464261374424,
|
||||
-0.12882583544682871, -1.6555248021907429, 0.013086014377651681,
|
||||
0.062454455755349927, 0.77304176654886514, 0.12704646649909671,
|
||||
0.40865153244567209, 1.195437623807228, -0.18555786800092541,
|
||||
-1.299714084101439, 0.89967540292281434, -0.033647925669371137,
|
||||
-1.5446015243088369, 0.65520298400478949, -0.71393501757996425};
|
||||
double response[] = {14.4535533874191, 6.62282520910778, 13.6714139876233,
|
||||
14.1975175236874, 12.8605301149348, 12.5228556826206, 14.2146384581959,
|
||||
7.9242642010286, 12.5069380013745, 13.7342047122325, 14.7108554131065,
|
||||
13.5962229304995, 5.89001909002711, 13.5586535685782, 14.0431671811957,
|
||||
13.9313910018427, 13.2189198447833, 17.0905598230825, 15.1993220372035,
|
||||
13.2616669404325, 15.7606359467964, 12.0838552528602, 14.344906985408,
|
||||
12.6094936116173, 11.9329594317628, 13.4086741328164, 13.7007653532941,
|
||||
13.0133656112894, 15.794998892751, 14.600198458049, 16.2757508936254,
|
||||
11.5643493993645, 14.8090225170414, 12.9823612913134, 15.003502495484,
|
||||
14.7373366435951, 15.7476765061616, 11.6745084114309, 14.047278212178,
|
||||
14.6669170934119, 13.8062403198314, 13.6111487435938, 13.3471486192318,
|
||||
14.2251519152709, 14.7188461068404, 14.2172164843947, 14.4180584862351,
|
||||
14.7196335400403, 12.799715984732, 13.9330377247579, 15.2646032349699,
|
||||
14.6603872891079, 9.73869078623634, 14.4434243169553, 14.4172837909381,
|
||||
15.1845379738711, 13.3449384473427, 15.3729427547467, 13.8115544407009,
|
||||
15.103777322749, 15.3838341258708, 14.368611819712, 12.525202176137,
|
||||
14.3250330647389, 15.2596577477861, 13.0045474727206, 14.515987797507,
|
||||
15.176981889542, 14.9241874861469, 13.872430121229, 15.3953655496863,
|
||||
13.4280761187509, 15.2034304536162, 14.1866308929129, 13.3058326261246,
|
||||
14.0746238485616, 14.1030921763152, 13.49966901054, 11.5846746059002,
|
||||
14.2648911116312, 14.88561614061, 13.9672969505607, 16.604679813678,
|
||||
10.3676055239145, 14.7434725924834, 16.3088265042892, 14.1086733681544,
|
||||
13.5909878288487, 14.6745463058857, 15.2940472804827, 14.6867226502357,
|
||||
13.6114224063955, 11.9702698734486, 13.8841573398, 15.0717757159234,
|
||||
12.5898155750775, 13.8187450898422, 14.2453171289186, 14.4065299197652,
|
||||
14.3479407847109};
|
||||
double newdata1[] = {-2.5, 0., 2.5, 0., 0., 0.};
|
||||
double newdata2[] = {-0.5, 0.5, 0., 0.};
|
||||
double coverage = .99;
|
||||
int n = 100, p = 2, m = 3, se_fit = FALSE;
|
||||
int i;
|
||||
|
||||
main() {
|
||||
printf("\nloess(&madeup):\n");
|
||||
loess_setup(one_two, response, n, p, &madeup);
|
||||
madeup.model.span = 0.5;
|
||||
loess(&madeup);
|
||||
loess_summary(&madeup);
|
||||
|
||||
printf("\nloess(&madeup_new):\n");
|
||||
loess_setup(one_two, response, n, p, &madeup_new);
|
||||
madeup_new.model.span = 0.8;
|
||||
madeup_new.model.drop_square[0] = TRUE;
|
||||
madeup_new.model.parametric[0] = TRUE;
|
||||
loess(&madeup_new);
|
||||
loess_summary(&madeup_new);
|
||||
|
||||
printf("\nloess(&madeup_new) (family = symmetric):\n");
|
||||
madeup_new.model.family = "symmetric";
|
||||
loess(&madeup_new);
|
||||
loess_summary(&madeup_new);
|
||||
|
||||
printf("\nloess(&madeup_new) (normalize = FALSE):\n");
|
||||
madeup_new.model.normalize = FALSE;
|
||||
loess(&madeup_new);
|
||||
loess_summary(&madeup_new);
|
||||
|
||||
printf("\npredict(newdata1, m, &madeup, &madeup_pred, %d):\n", se_fit);
|
||||
predict(newdata1, m, &madeup, &madeup_pred, se_fit);
|
||||
printf("%g %g %g\n", madeup_pred.fit[0],
|
||||
madeup_pred.fit[1], madeup_pred.fit[2]);
|
||||
|
||||
m = 2;
|
||||
se_fit = TRUE;
|
||||
printf("\npredict(newdata2, m, &madeup, &madeup_pred, %d):\n", se_fit);
|
||||
predict(newdata2, m, &madeup, &madeup_pred, se_fit);
|
||||
printf("%g %g\n", madeup_pred.fit[0], madeup_pred.fit[1]);
|
||||
printf("%g %g\n", madeup_pred.se_fit[0], madeup_pred.se_fit[1]);
|
||||
printf("%g\n", madeup_pred.residual_scale);
|
||||
printf("%g\n", madeup_pred.df);
|
||||
|
||||
printf("\npointwise(&madeup_pred, m, coverage, &madeup_ci):\n");
|
||||
pointwise(&madeup_pred, m, coverage, &madeup_ci);
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", madeup_ci.upper[i]);
|
||||
printf("\n");
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", madeup_ci.fit[i]);
|
||||
printf("\n");
|
||||
for(i = 0; i < m; i++)
|
||||
printf("%g ", madeup_ci.lower[i]);
|
||||
printf("\n");
|
||||
|
||||
loess_setup(one_two, response, n, p, &madeup2);
|
||||
madeup2.model.span = 0.75;
|
||||
loess(&madeup2);
|
||||
|
||||
printf("\nanova(&madeup2, &madeup, &madeup_anova):\n");
|
||||
anova(&madeup2, &madeup, &madeup_anova);
|
||||
printf("%g %g %g %g\n", madeup_anova.dfn, madeup_anova.dfd,
|
||||
madeup_anova.F_value, madeup_anova.Pr_F);
|
||||
|
||||
loess_free_mem(&madeup);
|
||||
loess_free_mem(&madeup2);
|
||||
loess_free_mem(&madeup_new);
|
||||
pred_free_mem(&madeup_pred);
|
||||
pw_free_mem(&madeup_ci);
|
||||
}
|
||||
77
pmsco/loess/makefile
Normal file
77
pmsco/loess/makefile
Normal file
@@ -0,0 +1,77 @@
|
||||
SHELL=/bin/sh
|
||||
|
||||
# makefile for the LOESS module
|
||||
#
|
||||
# required libraries: libblas, liblapack, libf2c
|
||||
# (you may have to set soft links so that linker finds them)
|
||||
#
|
||||
# see the top-level makefile for additional information.
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .x
|
||||
.PHONY: all loess test gas madeup ethanol air galaxy
|
||||
|
||||
HOST=$(shell hostname)
|
||||
CFLAGS=-O
|
||||
FFLAGS=-O
|
||||
OBJ=loessc.o loess.o predict.o misc.o loessf.o dqrsl.o dsvdc.o fix_main.o
|
||||
LIB=-lblas -lm -lf2c
|
||||
LIBPATH=
|
||||
CC=gcc
|
||||
CCOPTS=
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
ifneq (,$(filter merlin%,$(HOST)))
|
||||
PYTHONINC=-I/usr/include/python2.7 -I/opt/python/python-2.7.5/include/python2.7/
|
||||
else ifneq (,$(filter ra%,$(HOST)))
|
||||
PYTHONINC=-I${PSI_PYTHON27_INCLUDE_DIR}/python2.7 -I${PSI_PYTHON27_LIBRARY_DIR}/python2.7/site-packages/numpy/core/include
|
||||
else
|
||||
PYTHONINC=-I/usr/include/python2.7
|
||||
endif
|
||||
|
||||
all: loess
|
||||
|
||||
loess: _loess.so
|
||||
|
||||
loess_wrap.c: loess.c loess.i
|
||||
$(SWIG) $(SWIGOPTS) -python loess.i
|
||||
|
||||
loess.py _loess.so: loess_wrap.c
|
||||
# setuptools doesn't handle the fortran files correctly
|
||||
# $(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
|
||||
$(CC) $(CFLAGS) -fpic -c loessc.c loess.c predict.c misc.c loessf.f dqrsl.f dsvdc.f fix_main.c
|
||||
$(CC) $(CFLAGS) -fpic -c loess_wrap.c $(PYTHONINC)
|
||||
$(CC) -shared $(OBJ) $(LIB) $(LIBPATH) loess_wrap.o -o _loess.so
|
||||
|
||||
examples: gas madeup ethanol air galaxy
|
||||
|
||||
gas: gas.x
|
||||
|
||||
gas.x: gas.o $(OBJ)
|
||||
$(CC) -o gas.x gas.o $(OBJ) $(LIB)
|
||||
|
||||
madeup: madeup.x
|
||||
|
||||
madeup.x: madeup.o $(OBJ)
|
||||
$(CC) -o madeup.x madeup.o $(OBJ) $(LIB)
|
||||
|
||||
ethanol: ethanol.x
|
||||
|
||||
ethanol.x: ethanol.o $(OBJ)
|
||||
$(CC) -o ethanol.x ethanol.o $(OBJ) $(LIB)
|
||||
|
||||
air: air.x
|
||||
|
||||
air.x: air.o $(OBJ)
|
||||
$(CC) -o air.x air.o $(OBJ) $(LIB)
|
||||
|
||||
galaxy: galaxy.x
|
||||
|
||||
galaxy.x: galaxy.o $(OBJ)
|
||||
$(CC) -o galaxy.x galaxy.o $(OBJ) $(LIB)
|
||||
|
||||
clean:
|
||||
rm -f *.o *.so *.x core *.pyc
|
||||
rm -f loess.py loess_wrap.c
|
||||
349
pmsco/loess/misc.c
Normal file
349
pmsco/loess/misc.c
Normal file
@@ -0,0 +1,349 @@
|
||||
#include "S.h"
|
||||
#include "loess.h"
|
||||
|
||||
/* If your compiler is so ancient it doesn't recognize void, say
|
||||
#define void
|
||||
*/
|
||||
|
||||
void
|
||||
anova(one, two, out)
|
||||
struct loess_struct *one, *two;
|
||||
struct anova_struct *out;
|
||||
{
|
||||
double one_d1, one_d2, one_s, two_d1, two_d2, two_s,
|
||||
rssdiff, d1diff, tmp, pf();
|
||||
int max_enp;
|
||||
|
||||
one_d1 = one->out.one_delta;
|
||||
one_d2 = one->out.two_delta;
|
||||
one_s = one->out.s;
|
||||
two_d1 = two->out.one_delta;
|
||||
two_d2 = two->out.two_delta;
|
||||
two_s = two->out.s;
|
||||
|
||||
rssdiff = fabs(one_s * one_s * one_d1 - two_s * two_s * two_d1);
|
||||
d1diff = fabs(one_d1 - two_d1);
|
||||
out->dfn = d1diff * d1diff / fabs(one_d2 - two_d2);
|
||||
max_enp = (one->out.enp > two->out.enp);
|
||||
tmp = max_enp ? one_d1 : two_d1;
|
||||
out->dfd = tmp * tmp / (max_enp ? one_d2 : two_d2);
|
||||
tmp = max_enp ? one_s : two_s;
|
||||
out->F_value = (rssdiff / d1diff) / (tmp * tmp);
|
||||
out->Pr_F = 1 - pf(out->F_value, out->dfn, out->dfd);
|
||||
}
|
||||
|
||||
void
|
||||
pointwise(pre, m, coverage, ci)
|
||||
struct pred_struct *pre;
|
||||
int m;
|
||||
double coverage;
|
||||
struct ci_struct *ci;
|
||||
{
|
||||
double t_dist, limit, fit, qt();
|
||||
int i;
|
||||
|
||||
ci->fit = (double *) malloc(m * sizeof(double));
|
||||
ci->upper = (double *) malloc(m * sizeof(double));
|
||||
ci->lower = (double *) malloc(m * sizeof(double));
|
||||
|
||||
t_dist = qt(1 - (1 - coverage)/2, pre->df);
|
||||
for(i = 0; i < m; i++) {
|
||||
limit = pre->se_fit[i] * t_dist;
|
||||
ci->fit[i] = fit = pre->fit[i];
|
||||
ci->upper[i] = fit + limit;
|
||||
ci->lower[i] = fit - limit;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pw_free_mem(ci)
|
||||
struct ci_struct *ci;
|
||||
{
|
||||
free(ci->fit);
|
||||
free(ci->upper);
|
||||
free(ci->lower);
|
||||
}
|
||||
|
||||
double
|
||||
pf(q, df1, df2)
|
||||
double q, df1, df2;
|
||||
{
|
||||
double ibeta();
|
||||
|
||||
return(ibeta(q*df1/(df2+q*df1), df1/2, df2/2));
|
||||
}
|
||||
|
||||
double
|
||||
qt(p, df)
|
||||
double p, df;
|
||||
{
|
||||
double t, invibeta();
|
||||
|
||||
t = invibeta(fabs(2*p-1), 0.5, df/2);
|
||||
return((p>0.5?1:-1) * sqrt(t*df/(1-t)));
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
/*
|
||||
* Incomplete beta function.
|
||||
* Reference: Abramowitz and Stegun, 26.5.8.
|
||||
* Assumptions: 0 <= x <= 1; a,b > 0.
|
||||
*/
|
||||
#define DOUBLE_EPS 2.2204460492503131E-16
|
||||
#define IBETA_LARGE 1.0e30
|
||||
#define IBETA_SMALL 1.0e-30
|
||||
|
||||
double
|
||||
ibeta(x, a, b)
|
||||
double x, a, b;
|
||||
{
|
||||
int flipped = 0, i, k, count;
|
||||
double I, temp, pn[6], ak, bk, next, prev, factor, val;
|
||||
|
||||
if (x <= 0)
|
||||
return(0);
|
||||
if (x >= 1)
|
||||
return(1);
|
||||
|
||||
/* use ibeta(x,a,b) = 1-ibeta(1-x,b,a) */
|
||||
if ((a+b+1)*x > (a+1)) {
|
||||
flipped = 1;
|
||||
temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
x = 1 - x;
|
||||
}
|
||||
|
||||
pn[0] = 0.0;
|
||||
pn[2] = pn[3] = pn[1] = 1.0;
|
||||
count = 1;
|
||||
val = x/(1.0-x);
|
||||
bk = 1.0;
|
||||
next = 1.0;
|
||||
do {
|
||||
count++;
|
||||
k = count/2;
|
||||
prev = next;
|
||||
if (count%2 == 0)
|
||||
ak = -((a+k-1.0)*(b-k)*val)/
|
||||
((a+2.0*k-2.0)*(a+2.0*k-1.0));
|
||||
else
|
||||
ak = ((a+b+k-1.0)*k*val)/
|
||||
((a+2.0*k)*(a+2.0*k-1.0));
|
||||
pn[4] = bk*pn[2] + ak*pn[0];
|
||||
pn[5] = bk*pn[3] + ak*pn[1];
|
||||
next = pn[4] / pn[5];
|
||||
for (i=0; i<=3; i++)
|
||||
pn[i] = pn[i+2];
|
||||
if (fabs(pn[4]) >= IBETA_LARGE)
|
||||
for (i=0; i<=3; i++)
|
||||
pn[i] /= IBETA_LARGE;
|
||||
if (fabs(pn[4]) <= IBETA_SMALL)
|
||||
for (i=0; i<=3; i++)
|
||||
pn[i] /= IBETA_SMALL;
|
||||
} while (fabs(next-prev) > DOUBLE_EPS*prev);
|
||||
factor = a*log(x) + (b-1)*log(1-x);
|
||||
factor -= gamma(a+1) + gamma(b) - gamma(a+b);
|
||||
I = exp(factor) * next;
|
||||
return(flipped ? 1-I : I);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rational approximation to inverse Gaussian distribution.
|
||||
* Absolute error is bounded by 4.5e-4.
|
||||
* Reference: Abramowitz and Stegun, page 933.
|
||||
* Assumption: 0 < p < 1.
|
||||
*/
|
||||
|
||||
static double num[] = {
|
||||
2.515517,
|
||||
0.802853,
|
||||
0.010328
|
||||
};
|
||||
|
||||
static double den[] = {
|
||||
1.000000,
|
||||
1.432788,
|
||||
0.189269,
|
||||
0.001308
|
||||
};
|
||||
|
||||
double
|
||||
invigauss_quick(p)
|
||||
double p;
|
||||
{
|
||||
int lower;
|
||||
double t, n, d, q;
|
||||
|
||||
if(p == 0.5)
|
||||
return(0);
|
||||
lower = p < 0.5;
|
||||
p = lower ? p : 1 - p;
|
||||
t = sqrt(-2 * log(p));
|
||||
n = (num[2]*t + num[1])*t + num[0];
|
||||
d = ((den[3]*t + den[2])*t + den[1])*t + den[0];
|
||||
q = lower ? n/d - t : t - n/d;
|
||||
return(q);
|
||||
}
|
||||
|
||||
/*
|
||||
* Inverse incomplete beta function.
|
||||
* Assumption: 0 <= p <= 1, a,b > 0.
|
||||
*/
|
||||
|
||||
double
|
||||
invibeta(p, a, b)
|
||||
double p, a, b;
|
||||
{
|
||||
int i;
|
||||
double ql, qr, qm, qdiff;
|
||||
double pl, pr, pm, pdiff;
|
||||
double invibeta_quick(), ibeta();
|
||||
|
||||
/* MEANINGFUL(qm);*/
|
||||
qm = 0;
|
||||
if(p == 0 || p == 1)
|
||||
return(p);
|
||||
|
||||
/* initialize [ql,qr] containing the root */
|
||||
ql = qr = invibeta_quick(p, a, b);
|
||||
pl = pr = ibeta(ql, a, b);
|
||||
if(pl == p)
|
||||
return(ql);
|
||||
if(pl < p)
|
||||
while(1) {
|
||||
qr += 0.05;
|
||||
if(qr >= 1) {
|
||||
pr = qr = 1;
|
||||
break;
|
||||
}
|
||||
pr = ibeta(qr, a, b);
|
||||
if(pr == p)
|
||||
return(pr);
|
||||
if(pr > p)
|
||||
break;
|
||||
}
|
||||
else
|
||||
while(1) {
|
||||
ql -= 0.05;
|
||||
if(ql <= 0) {
|
||||
pl = ql = 0;
|
||||
break;
|
||||
}
|
||||
pl = ibeta(ql, a, b);
|
||||
if(pl == p)
|
||||
return(pl);
|
||||
if(pl < p)
|
||||
break;
|
||||
}
|
||||
|
||||
/* a few steps of bisection */
|
||||
for(i = 0; i < 5; i++) {
|
||||
qm = (ql + qr) / 2;
|
||||
pm = ibeta(qm, a, b);
|
||||
qdiff = qr - ql;
|
||||
pdiff = pm - p;
|
||||
if(fabs(qdiff) < DOUBLE_EPS*qm || fabs(pdiff) < DOUBLE_EPS)
|
||||
return(qm);
|
||||
if(pdiff < 0) {
|
||||
ql = qm;
|
||||
pl = pm;
|
||||
} else {
|
||||
qr = qm;
|
||||
pr = pm;
|
||||
}
|
||||
}
|
||||
|
||||
/* a few steps of secant */
|
||||
for(i = 0; i < 40; i++) {
|
||||
qm = ql + (p-pl)*(qr-ql)/(pr-pl);
|
||||
pm = ibeta(qm, a, b);
|
||||
qdiff = qr - ql;
|
||||
pdiff = pm - p;
|
||||
if(fabs(qdiff) < 2*DOUBLE_EPS*qm || fabs(pdiff) < 2*DOUBLE_EPS)
|
||||
return(qm);
|
||||
if(pdiff < 0) {
|
||||
ql = qm;
|
||||
pl = pm;
|
||||
} else {
|
||||
qr = qm;
|
||||
pr = pm;
|
||||
}
|
||||
}
|
||||
|
||||
/* no convergence */
|
||||
return(qm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Quick approximation to inverse incomplete beta function,
|
||||
* by matching first two moments with the Gaussian distribution.
|
||||
* Assumption: 0 < p < 1, a,b > 0.
|
||||
*/
|
||||
|
||||
static double
|
||||
misc_fmin(a, b)
|
||||
double a, b;
|
||||
{
|
||||
return(a < b ? a : b);
|
||||
}
|
||||
|
||||
static double
|
||||
misc_fmax(a, b)
|
||||
double a, b;
|
||||
{
|
||||
return(a > b ? a : b);
|
||||
}
|
||||
|
||||
double
|
||||
invibeta_quick(p, a, b)
|
||||
double p, a, b;
|
||||
{
|
||||
double x, m, s, invigauss_quick();
|
||||
|
||||
x = a + b;
|
||||
m = a / x;
|
||||
s = sqrt((a*b) / (x*x*(x+1)));
|
||||
return(misc_fmax(0.0, misc_fmin(1.0, invigauss_quick(p)*s + m)));
|
||||
}
|
||||
|
||||
typedef double doublereal;
|
||||
typedef int integer;
|
||||
|
||||
void
|
||||
Recover(a, b)
|
||||
char *a;
|
||||
int *b;
|
||||
{
|
||||
printf(a,b);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void
|
||||
Warning(a, b)
|
||||
char *a;
|
||||
int *b;
|
||||
{
|
||||
printf(a,b);
|
||||
}
|
||||
|
||||
/* d1mach may be replaced by Fortran code:
|
||||
mail netlib@netlib.bell-labs.com
|
||||
send d1mach from core.
|
||||
*/
|
||||
|
||||
#include <float.h>
|
||||
|
||||
doublereal F77_SUB(d1mach) (i)
|
||||
integer *i;
|
||||
{
|
||||
switch(*i){
|
||||
case 1: return DBL_MIN;
|
||||
case 2: return DBL_MAX;
|
||||
case 3: return DBL_EPSILON/FLT_RADIX;
|
||||
case 4: return DBL_EPSILON;
|
||||
case 5: return log10(FLT_RADIX);
|
||||
default: Recover("Invalid argument to d1mach()", 0L);
|
||||
}
|
||||
}
|
||||
|
||||
3161
pmsco/loess/numpy.i
Normal file
3161
pmsco/loess/numpy.i
Normal file
File diff suppressed because it is too large
Load Diff
175
pmsco/loess/predict.c
Normal file
175
pmsco/loess/predict.c
Normal file
@@ -0,0 +1,175 @@
|
||||
#include "S.h"
|
||||
#include "loess.h"
|
||||
|
||||
void
|
||||
predict(eval, m, lo, pre, se)
|
||||
double *eval;
|
||||
int m, se;
|
||||
struct loess_struct *lo;
|
||||
struct pred_struct *pre;
|
||||
{
|
||||
int size_info[3];
|
||||
void pred_();
|
||||
|
||||
pre->fit = (double *) malloc(m * sizeof(double));
|
||||
pre->se_fit = (double *) malloc(m * sizeof(double));
|
||||
pre->residual_scale = lo->out.s;
|
||||
pre->df = (lo->out.one_delta * lo->out.one_delta) / lo->out.two_delta;
|
||||
|
||||
size_info[0] = lo->in.p;
|
||||
size_info[1] = lo->in.n;
|
||||
size_info[2] = m;
|
||||
|
||||
pred_(lo->in.y, lo->in.x, eval, size_info, &lo->out.s,
|
||||
lo->in.weights,
|
||||
lo->out.robust,
|
||||
&lo->model.span,
|
||||
&lo->model.degree,
|
||||
&lo->model.normalize,
|
||||
lo->model.parametric,
|
||||
lo->model.drop_square,
|
||||
&lo->control.surface,
|
||||
&lo->control.cell,
|
||||
&lo->model.family,
|
||||
lo->kd_tree.parameter,
|
||||
lo->kd_tree.a,
|
||||
lo->kd_tree.xi,
|
||||
lo->kd_tree.vert,
|
||||
lo->kd_tree.vval,
|
||||
lo->out.divisor,
|
||||
&se,
|
||||
pre->fit,
|
||||
pre->se_fit);
|
||||
}
|
||||
|
||||
void
|
||||
pred_(y, x_, new_x, size_info, s, weights, robust, span, degree,
|
||||
normalize, parametric, drop_square, surface, cell, family,
|
||||
parameter, a, xi, vert, vval, divisor, se, fit, se_fit)
|
||||
double *y, *x_, *new_x, *weights, *robust, *span, *cell, *fit, *s,
|
||||
*xi, *vert, *vval, *divisor, *se_fit;
|
||||
int *size_info, *degree, *normalize, *parametric, *drop_square,
|
||||
*parameter, *a, *se;
|
||||
char **surface, **family;
|
||||
{
|
||||
double *x, *x_tmp, *x_evaluate, *L, new_cell, z, tmp, *fit_tmp,
|
||||
*temp, sum, mean;
|
||||
int N, D, M, sum_drop_sqr = 0, sum_parametric = 0,
|
||||
nonparametric = 0, *order_parametric, *order_drop_sqr;
|
||||
int i, j, k, p, cut, comp();
|
||||
|
||||
D = size_info[0];
|
||||
N = size_info[1];
|
||||
M = size_info[2];
|
||||
|
||||
x = (double *) malloc(N * D * sizeof(double));
|
||||
x_tmp = (double *) malloc(N * D * sizeof(double));
|
||||
x_evaluate = (double *) malloc(M * D * sizeof(double));
|
||||
L = (double *) malloc(N * M * sizeof(double));
|
||||
order_parametric = (int *) malloc(D * sizeof(int));
|
||||
order_drop_sqr = (int *) malloc(D * sizeof(int));
|
||||
temp = (double *) malloc(N * D * sizeof(double));
|
||||
|
||||
for(i = 0; i < (N * D); i++)
|
||||
x_tmp[i] = x_[i];
|
||||
for(i = 0; i < D; i++) {
|
||||
k = i * M;
|
||||
for(j = 0; j < M; j++) {
|
||||
p = k + j;
|
||||
new_x[p] = new_x[p] / divisor[i];
|
||||
}
|
||||
}
|
||||
if(!strcmp(*surface, "direct") || se) {
|
||||
for(i = 0; i < D; i++) {
|
||||
k = i * N;
|
||||
for(j = 0; j < N; j++) {
|
||||
p = k + j;
|
||||
x_tmp[p] = x_[p] / divisor[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
j = D - 1;
|
||||
for(i = 0; i < D; i++) {
|
||||
sum_drop_sqr = sum_drop_sqr + drop_square[i];
|
||||
sum_parametric = sum_parametric + parametric[i];
|
||||
if(parametric[i])
|
||||
order_parametric[j--] = i;
|
||||
else
|
||||
order_parametric[nonparametric++] = i;
|
||||
}
|
||||
for(i = 0; i < D; i++) {
|
||||
order_drop_sqr[i] = 2 - drop_square[order_parametric[i]];
|
||||
k = i * M;
|
||||
p = order_parametric[i] * M;
|
||||
for(j = 0; j < M; j++)
|
||||
x_evaluate[k + j] = new_x[p + j];
|
||||
k = i * N;
|
||||
p = order_parametric[i] * N;
|
||||
for(j = 0; j < N; j++)
|
||||
x[k + j] = x_tmp[p + j];
|
||||
}
|
||||
for(i = 0; i < N; i++)
|
||||
robust[i] = weights[i] * robust[i];
|
||||
|
||||
if(!strcmp(*surface, "direct")) {
|
||||
if(*se) {
|
||||
loess_dfitse(y, x, x_evaluate, weights, robust,
|
||||
!strcmp(*family, "gaussian"), span, degree,
|
||||
&nonparametric, order_drop_sqr, &sum_drop_sqr,
|
||||
&D, &N, &M, fit, L);
|
||||
}
|
||||
else {
|
||||
loess_dfit(y, x, x_evaluate, robust, span, degree,
|
||||
&nonparametric, order_drop_sqr, &sum_drop_sqr,
|
||||
&D, &N, &M, fit);
|
||||
}
|
||||
}
|
||||
else {
|
||||
loess_ifit(parameter, a, xi, vert, vval, &M, x_evaluate, fit);
|
||||
if(*se) {
|
||||
new_cell = (*span) * (*cell);
|
||||
fit_tmp = (double *) malloc(M * sizeof(double));
|
||||
loess_ise(y, x, x_evaluate, weights, span, degree,
|
||||
&nonparametric, order_drop_sqr, &sum_drop_sqr,
|
||||
&new_cell, &D, &N, &M, fit_tmp, L);
|
||||
free(fit_tmp);
|
||||
}
|
||||
}
|
||||
if(*se) {
|
||||
for(i = 0; i < N; i++) {
|
||||
k = i * M;
|
||||
for(j = 0; j < M; j++) {
|
||||
p = k + j;
|
||||
L[p] = L[p] / weights[i];
|
||||
L[p] = L[p] * L[p];
|
||||
}
|
||||
}
|
||||
for(i = 0; i < M; i++) {
|
||||
tmp = 0;
|
||||
for(j = 0; j < N; j++)
|
||||
tmp = tmp + L[i + j * M];
|
||||
se_fit[i] = (*s) * sqrt(tmp);
|
||||
}
|
||||
}
|
||||
free(x);
|
||||
free(x_tmp);
|
||||
free(x_evaluate);
|
||||
free(L);
|
||||
free(order_parametric);
|
||||
free(order_drop_sqr);
|
||||
free(temp);
|
||||
}
|
||||
|
||||
void
|
||||
pred_free_mem(pre)
|
||||
struct pred_struct *pre;
|
||||
{
|
||||
free(pre->fit);
|
||||
free(pre->se_fit);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
77
pmsco/loess/predict.m
Normal file
77
pmsco/loess/predict.m
Normal file
@@ -0,0 +1,77 @@
|
||||
NAME
|
||||
predict, pointwise, pred_free_mem, pw_free_mem
|
||||
|
||||
SYNOPSIS
|
||||
#include "loess.h"
|
||||
|
||||
double *eval, coverage;
|
||||
long m, se;
|
||||
struct loess_struct *lo;
|
||||
struct predict_struct *pre;
|
||||
struct ci_struct *ci;
|
||||
|
||||
void predict(eval, m, lo, pre, se)
|
||||
|
||||
void pointwise(pre, m, coverage, ci)
|
||||
|
||||
void pred_free_mem(pre)
|
||||
|
||||
void pw_free_mem(ci)
|
||||
|
||||
PARAMETERS
|
||||
|
||||
eval a vector of length m * p specifying the values of the
|
||||
predictors at which the evaluation is to be carried out.
|
||||
The j-th coordinate of the i-th point is in eval[i+m*j],
|
||||
where 0<=j<p, 0<=i<m.
|
||||
|
||||
m number of evaluations.
|
||||
|
||||
lo k-d tree and coefficients.
|
||||
|
||||
pre predicted values; output by predict(), input to pointwise().
|
||||
|
||||
se logical flag for computing standard errors at eval.
|
||||
|
||||
ci pointwise confidence limits.
|
||||
|
||||
coverage (input) confidence level of the limits as a fraction.
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
predict() takes all the loess structures from earlier calls to
|
||||
loess_setup() and loess(), does an evaluation based on
|
||||
eval and m, and stores the results in the pre structure.
|
||||
if se is TRUE, then pre.se_fit are computed along with the
|
||||
surface values, pre.fit. These returned vectors
|
||||
are vectors of the same structural arrangement as that of eval.
|
||||
|
||||
pointwise() computes the pointwise confidence limits from the
|
||||
result of predict().
|
||||
|
||||
pred_free_mem() and pw_free_mem() frees up the allocated memory
|
||||
used by the pre and ci structures respectively.
|
||||
|
||||
loess_struct, pred_struct, and ci_struct are defined in loess.h
|
||||
and documented in struct.m.
|
||||
|
||||
NOTES
|
||||
|
||||
The computations of predict() that produce the component se_fit
|
||||
are much more costly than those that producing the fit,
|
||||
so the number of points at which standard errors are
|
||||
computed should be modest compared to those at which we do
|
||||
evaluations. Often this means calling predict() twice,
|
||||
once at a large number of points, with se = FALSE,
|
||||
to get a thorough description of the surface; and once
|
||||
at a small number of points, with se = TRUE,
|
||||
to get standard-error information.
|
||||
|
||||
Suppose the computation method for loess surfaces is
|
||||
interpolate, the default for the argument surface. Then the
|
||||
evaluation values of a numeric predictor must lie within
|
||||
the range of the values of the predictor used in the fit.
|
||||
|
||||
SEE ALSO
|
||||
|
||||
loess_setup, loess, loess_summary, loess_free_mem
|
||||
55
pmsco/loess/setup.py
Normal file
55
pmsco/loess/setup.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__author__ = 'Matthias Muntwiler'
|
||||
|
||||
"""
|
||||
@package loess.setup
|
||||
setup.py file for LOESS
|
||||
|
||||
the LOESS code included here was developed at Bell Labs by
|
||||
William S. Cleveland, Eric Grosse, Ming-Jen Shyu,
|
||||
and is dated 18 August 1992.
|
||||
the code is available in the public domain
|
||||
from http://www.netlib.org/a/dloess.
|
||||
see the README file for details.
|
||||
|
||||
the Python wrapper was set up by M. Muntwiler
|
||||
with the help of the SWIG toolkit
|
||||
and other incredible goodies available in the Linux world.
|
||||
|
||||
@bug this file is currently not used because
|
||||
distutils does not compile the included Fortran files.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from distutils.core import setup, Extension
|
||||
from distutils import sysconfig
|
||||
|
||||
import numpy
|
||||
try:
|
||||
numpy_include = numpy.get_include()
|
||||
except AttributeError:
|
||||
numpy_include = numpy.get_numpy_include()
|
||||
|
||||
loess_module = Extension('_loess',
|
||||
sources=['loess.i', 'loess_wrap.c', 'loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f',
|
||||
'dqrsl.f', 'dsvdc.f'],
|
||||
include_dirs = [numpy_include],
|
||||
libraries=['blas', 'm', 'f2c'],
|
||||
)
|
||||
|
||||
setup(name='loess',
|
||||
version='0.1',
|
||||
author=__author__,
|
||||
author_email='matthias.muntwiler@psi.ch',
|
||||
description="""LOESS module in Python""",
|
||||
ext_modules=[loess_module],
|
||||
py_modules=["loess"], requires=['numpy']
|
||||
)
|
||||
163
pmsco/loess/struct.m
Normal file
163
pmsco/loess/struct.m
Normal file
@@ -0,0 +1,163 @@
|
||||
(All default values mentioned here are set by loess_setup().)
|
||||
|
||||
struct loess_struct *lo;
|
||||
|
||||
in
|
||||
n: number of observations.
|
||||
|
||||
p: number of numeric predictors.
|
||||
|
||||
y: vector of response (length n).
|
||||
|
||||
x: vector of predictors, of length (n * p).
|
||||
The j-th coordinate of the i-th point is in x[i+n*j],
|
||||
where 0<=j<p, 0<=i<n.
|
||||
|
||||
weights: vector of weights to be given to individual
|
||||
observations in the sum of squared residuals that
|
||||
forms the local fitting criterion.
|
||||
By default, an unweighted fit is carried out.
|
||||
If supplied, weights should be a non-negative
|
||||
numeric vector. If the different observations
|
||||
have non-equal variances, weights should be
|
||||
inversely proportional to the variances.
|
||||
|
||||
model
|
||||
span: smoothing parameter. Default is 0.75.
|
||||
|
||||
degree: overall degree of locally-fitted polynomial. 1 is
|
||||
locally-linear fitting and 2 is locally-quadratic
|
||||
fitting. Default is 2.
|
||||
|
||||
normalize: logical that determines if numeric predictors should
|
||||
be normalized. If TRUE (1) - the default - the
|
||||
standard normalization is used. If FALSE (0), no
|
||||
normalization is carried out.
|
||||
|
||||
parametric: for two or more numeric predictors, this argument
|
||||
specifies those variables that should be
|
||||
conditionally-parametric. The argument should be a
|
||||
logical vector of length p, specified in the order
|
||||
of the predictor group ordered in x.
|
||||
Default is a vector of 0's of length p.
|
||||
|
||||
drop_square: for cases with degree = 2, and with two or more
|
||||
numeric predictors, this argument specifies those
|
||||
numeric predictors whose squares should be dropped
|
||||
from the set of fitting variables. The method of
|
||||
specification is the same as for parametric.
|
||||
Default is a vector of 0's of length p.
|
||||
|
||||
family: the assumed distribution of the errors. The values
|
||||
are "gaussian" or "symmetric". The first value is
|
||||
the default. If the second value is specified,
|
||||
a robust fitting procedure is used.
|
||||
|
||||
control
|
||||
surface: determines whether the fitted surface is computed
|
||||
directly at all points ("direct") or whether an
|
||||
interpolation method is used ("interpolate").
|
||||
The latter, the default, is what most users should
|
||||
use unless special circumstances warrant.
|
||||
|
||||
statistics: determines whether the statistical quantities are
|
||||
computed exactly ("exact") or approximately
|
||||
("approximate"). The latter is the default. The former
|
||||
should only be used for testing the approximation in
|
||||
statistical development and is not meant for routine
|
||||
usage because computation time can be horrendous.
|
||||
|
||||
cell: if interpolation is used to compute the surface, this
|
||||
argument specifies the maximum cell size of the k-d
|
||||
tree. Suppose k = floor(n*cell*span) where n is the
|
||||
number of observations. Then a cell is further
|
||||
divided if the number of observations within it
|
||||
is greater than or equal to k.
|
||||
|
||||
trace_hat: when surface is "approximate", determines the
|
||||
computational method used to compute the trace of
|
||||
the hat matrix, which is used in the computation of
|
||||
the statistical quantities. If "exact", an exact
|
||||
computation is done; normally this goes quite fast
|
||||
on the fastest machines until n, the number of
|
||||
observations is 1000 or more, but for very slow
|
||||
machines, things can slow down at n = 300.
|
||||
If "wait.to.decide" is selected, then a default
|
||||
is chosen in loess(); the default is "exact" for
|
||||
n < 500 and "approximate" otherwise. If surface
|
||||
is "exact", an exact computation is always done
|
||||
for the trace. Set trace_hat to "approximate" for
|
||||
large dataset will substantially reduce the
|
||||
computation time.
|
||||
|
||||
iterations: if family is "symmetric", the number of iterations
|
||||
of the robust fitting method. Default is 0 for
|
||||
family being "gaussian" by default.
|
||||
|
||||
kd_tree: k-d tree, an output of loess().
|
||||
|
||||
out
|
||||
fitted_values: fitted values of the local regression model
|
||||
|
||||
fitted_residuals: residuals of the local regression fit
|
||||
|
||||
enp: equivalent number of parameters.
|
||||
|
||||
s: estimate of the scale of the residuals.
|
||||
|
||||
one_delta: a statistical parameter used in the computation of
|
||||
standard errors.
|
||||
|
||||
two_delta: a statistical parameter used in the computation of
|
||||
standard errors.
|
||||
|
||||
pseudovalues: adjusted values of the response when robust
|
||||
estimation is used.
|
||||
|
||||
trace_hat: trace of the operator hat matrix.
|
||||
|
||||
diagonal: diagonal of the operator hat matrix.
|
||||
|
||||
robust: robustness weights for robust fitting.
|
||||
|
||||
divisor: normalization divisor for numeric predictors.
|
||||
|
||||
|
||||
struct pred_struct *pre;
|
||||
|
||||
fit: the evaluated loess surface at eval.
|
||||
|
||||
se_fit: estimates of the standard errors of the surface values.
|
||||
|
||||
residual_scale: estimate of the scale of the residuals.
|
||||
|
||||
df: the degrees of freedom of the t-distribution used to
|
||||
compute pointwise confidence intervals for the
|
||||
evaluated surface.
|
||||
|
||||
|
||||
struct anova_struct *aov;
|
||||
|
||||
dfn: degrees of freedom of the numerator.
|
||||
|
||||
dfd: degrees of freedom of the denominator.
|
||||
|
||||
F_values: F statistic.
|
||||
|
||||
Pr_F: probability F_value is exceeded if null hypothesis
|
||||
is true.
|
||||
|
||||
|
||||
struct ci_struct *ci;
|
||||
|
||||
fit: the evaluated loess surface at eval (see pred_struct).
|
||||
|
||||
upper: upper limits of pointwise confidence intervals.
|
||||
|
||||
lower: lower limits of pointwise confidence intervals.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
206
pmsco/loess/supp.f
Normal file
206
pmsco/loess/supp.f
Normal file
@@ -0,0 +1,206 @@
|
||||
subroutine ehg182(i)
|
||||
integer i
|
||||
if(i.eq.100) print *,'wrong version number in lowesd. Probably ty
|
||||
+po in caller.'
|
||||
if(i.eq.101) print *,'d>dMAX in ehg131. Need to recompile with in
|
||||
+creased dimensions.'
|
||||
if(i.eq.102) print *,'liv too small. (Discovered by lowesd)'
|
||||
if(i.eq.103) print *,'lv too small. (Discovered by lowesd)'
|
||||
if(i.eq.104) print *,'alpha too small. fewer data values than deg
|
||||
+rees of freedom.'
|
||||
if(i.eq.105) print *,'k>d2MAX in ehg136. Need to recompile with i
|
||||
+ncreased dimensions.'
|
||||
if(i.eq.106) print *,'lwork too small'
|
||||
if(i.eq.107) print *,'invalid value for kernel'
|
||||
if(i.eq.108) print *,'invalid value for ideg'
|
||||
if(i.eq.109) print *,'lowstt only applies when kernel=1.'
|
||||
if(i.eq.110) print *,'not enough extra workspace for robustness ca
|
||||
+lculation'
|
||||
if(i.eq.120) print *,'zero-width neighborhood. make alpha bigger'
|
||||
if(i.eq.121) print *,'all data on boundary of neighborhood. make a
|
||||
+lpha bigger'
|
||||
if(i.eq.122) print *,'extrapolation not allowed with blending'
|
||||
if(i.eq.123) print *,'ihat=1 (diag L) in l2fit only makes sense if
|
||||
+ z=x (eval=data).'
|
||||
if(i.eq.171) print *,'lowesd must be called first.'
|
||||
if(i.eq.172) print *,'lowesf must not come between lowesb and lowe
|
||||
+se, lowesr, or lowesl.'
|
||||
if(i.eq.173) print *,'lowesb must come before lowese, lowesr, or l
|
||||
+owesl.'
|
||||
if(i.eq.174) print *,'lowesb need not be called twice.'
|
||||
if(i.eq.180) print *,'nv>nvmax in cpvert.'
|
||||
if(i.eq.181) print *,'nt>20 in eval.'
|
||||
if(i.eq.182) print *,'svddc failed in l2fit.'
|
||||
if(i.eq.183) print *,'didnt find edge in vleaf.'
|
||||
if(i.eq.184) print *,'zero-width cell found in vleaf.'
|
||||
if(i.eq.185) print *,'trouble descending to leaf in vleaf.'
|
||||
if(i.eq.186) print *,'insufficient workspace for lowesf.'
|
||||
if(i.eq.187) print *,'insufficient stack space'
|
||||
if(i.eq.188) print *,'lv too small for computing explicit L'
|
||||
if(i.eq.191) print *,'computed trace L was negative; something is
|
||||
+wrong!'
|
||||
if(i.eq.192) print *,'computed delta was negative; something is wr
|
||||
+ong!'
|
||||
if(i.eq.193) print *,'workspace in loread appears to be corrupted'
|
||||
if(i.eq.194) print *,'trouble in l2fit/l2tr'
|
||||
if(i.eq.195) print *,'only constant, linear, or quadratic local mo
|
||||
+dels allowed'
|
||||
if(i.eq.196) print *,'degree must be at least 1 for vertex influen
|
||||
+ce matrix'
|
||||
if(i.eq.999) print *,'not yet implemented'
|
||||
print *,'Assert failed, error code ',i
|
||||
stop
|
||||
end
|
||||
subroutine ehg183(s,i,n,inc)
|
||||
character*(*) s
|
||||
integer n, inc, i(inc,n), j
|
||||
print *,s,(i(1,j),j=1,n)
|
||||
end
|
||||
subroutine ehg184(s,x,n,inc)
|
||||
character*(*) s
|
||||
integer n, inc, j
|
||||
double precision x(inc,n)
|
||||
print *,s,(x(1,j),j=1,n)
|
||||
end
|
||||
subroutine losave(iunit,iv,liv,lv,v)
|
||||
integer execnt,iunit,liv,lv
|
||||
integer iv(liv)
|
||||
DOUBLE PRECISION v(lv)
|
||||
external ehg167
|
||||
save execnt
|
||||
data execnt /0/
|
||||
execnt=execnt+1
|
||||
call ehg167(iunit,iv(2),iv(4),iv(5),iv(6),iv(14),v(iv(11)),iv(iv(7
|
||||
+)),v(iv(12)),v(iv(13)))
|
||||
return
|
||||
end
|
||||
subroutine ehg167(iunit,d,vc,nc,nv,nvmax,v,a,xi,vval)
|
||||
integer iunit,d,vc,nc,nv,a(nc),magic,i,j
|
||||
DOUBLE PRECISION v(nvmax,d),xi(nc),vval(0:d,nv)
|
||||
write(iunit,*)d,nc,nv
|
||||
do 10 i=1,d
|
||||
10 write(iunit,*)v(1,i),v(vc,i)
|
||||
j = 0
|
||||
do 20 i=1,nc
|
||||
if(a(i).ne.0)then
|
||||
write(iunit,*)a(i),xi(i)
|
||||
else
|
||||
write(iunit,*)a(i),j
|
||||
end if
|
||||
20 continue
|
||||
do 30 i=1,nv
|
||||
30 write(iunit,*)(vval(j,i),j=0,d)
|
||||
end
|
||||
subroutine lohead(iunit,d,vc,nc,nv)
|
||||
integer iunit,d,vc,nc,nv
|
||||
read(iunit,*)d,nc,nv
|
||||
vc = 2**d
|
||||
end
|
||||
subroutine loread(iunit,d,vc,nc,nv,iv,liv,lv,v)
|
||||
integer bound,d,execnt,iunit,liv,lv,nc,nv,vc
|
||||
integer iv(liv)
|
||||
DOUBLE PRECISION v(lv)
|
||||
external ehg168,ehg169,ehg182
|
||||
save execnt
|
||||
data execnt /0/
|
||||
execnt=execnt+1
|
||||
iv(28)=173
|
||||
iv(2)=d
|
||||
iv(4)=vc
|
||||
iv(14)=nv
|
||||
iv(17)=nc
|
||||
iv(7)=50
|
||||
iv(8)=iv(7)+nc
|
||||
iv(9)=iv(8)+vc*nc
|
||||
iv(10)=iv(9)+nc
|
||||
bound=iv(10)+nc
|
||||
if(.not.(bound-1.le.liv))then
|
||||
call ehg182(102)
|
||||
end if
|
||||
iv(11)=50
|
||||
iv(13)=iv(11)+nv*d
|
||||
iv(12)=iv(13)+(d+1)*nv
|
||||
bound=iv(12)+nc
|
||||
if(.not.(bound-1.le.lv))then
|
||||
call ehg182(103)
|
||||
end if
|
||||
call ehg168(iunit,d,vc,nc,nv,nv,v(iv(11)),iv(iv(7)),v(iv(12)),v(iv
|
||||
+(13)))
|
||||
call ehg169(d,vc,nc,nc,nv,nv,v(iv(11)),iv(iv(7)),v(iv(12)),iv(iv(8
|
||||
+)),iv(iv(9)),iv(iv(10)))
|
||||
return
|
||||
end
|
||||
subroutine ehg168(iunit,d,vc,nc,nv,nvmax,v,a,xi,vval)
|
||||
integer iunit,d,vc,nc,nv,a(nc),magic,i,j
|
||||
DOUBLE PRECISION v(nvmax,d),xi(nc),vval(0:d,nv)
|
||||
do 10 i=1,d
|
||||
10 read(iunit,*)v(1,i),v(vc,i)
|
||||
do 20 i=1,nc
|
||||
20 read(iunit,*)a(i),xi(i)
|
||||
do 30 i=1,nv
|
||||
30 read(iunit,*)(vval(j,i),j=0,d)
|
||||
end
|
||||
subroutine ehg170(k,d,vc,nv,nvmax,nc,ncmax,a,c,hi,lo,v,vval,xi)
|
||||
integer d,execnt,i,j,nc,ncmax,nv,nvmax,vc
|
||||
integer a(ncmax),c(vc,ncmax),hi(ncmax),lo(ncmax)
|
||||
double precision v(nvmax,d),vval(0:d,nvmax),xi(ncmax)
|
||||
save execnt
|
||||
data execnt /0/
|
||||
execnt=execnt+1
|
||||
write(k,*)' double precision function loeval(z)'
|
||||
write(k,50)d
|
||||
write(k,*)' integer d,vc,nv,nc'
|
||||
write(k,51)nc,vc,nc
|
||||
write(k,52)nc,nc
|
||||
write(k,53)nv,d
|
||||
write(k,54)d,nv
|
||||
write(k,55)nc
|
||||
write(k,56)
|
||||
write(k,57)d,vc,nv,nc
|
||||
50 format(' double precision z(',i2,')')
|
||||
51 format(' integer a(',i5,'), c(',i3,',',i5,')')
|
||||
52 format(' integer hi(',i5,'), lo(',i5,')')
|
||||
53 format(' double precision v(',i5,',',i2,')')
|
||||
54 format(' double precision vval(0:',i2,',',i5,')')
|
||||
55 format(' double precision xi(',i5,')')
|
||||
56 format(' double precision ehg128')
|
||||
57 format(' data d,vc,nv,nc /',i2,',',i3,',',i5,',',i5,'/')
|
||||
do 3 i=1,nc
|
||||
write(k,58)i,a(i)
|
||||
58 format(' data a(',i5,') /',i5,'/')
|
||||
if(a(i).ne.0)then
|
||||
write(k,59)i,i,i,hi(i),lo(i),xi(i)
|
||||
59 format(' data hi(',i5,'),lo(',i5,'),xi(',i5,') /',
|
||||
$ i5,',',i5,',',1pe15.6,'/')
|
||||
end if
|
||||
do 4 j=1,vc
|
||||
write(k,60)j,i,c(j,i)
|
||||
60 format(' data c(',i3,',',i5,') /',i5,'/')
|
||||
4 continue
|
||||
3 continue
|
||||
do 5 i=1,nv
|
||||
write(k,61)i,vval(0,i)
|
||||
61 format(' data vval(0,',i5,') /',1pe15.6,'/')
|
||||
do 6 j=1,d
|
||||
write(k,62)i,j,v(i,j)
|
||||
62 format(' data v(',i5,',',i2,') /',1pe15.6,'/')
|
||||
write(k,63)j,i,vval(j,i)
|
||||
63 format(' data vval(',i2,',',i5,') /',1pe15.6,'/')
|
||||
6 continue
|
||||
5 continue
|
||||
write(k,*)' loeval=ehg128(z,d,nc,vc,a,xi,lo,hi,c,v,nv,vval)'
|
||||
write(k,*)' end'
|
||||
return
|
||||
end
|
||||
subroutine lofort(iunit,iv,liv,lv,wv)
|
||||
integer execnt,iunit
|
||||
integer iv(*)
|
||||
DOUBLE PRECISION wv(*)
|
||||
external ehg170
|
||||
save execnt
|
||||
data execnt /0/
|
||||
execnt=execnt+1
|
||||
call ehg170(iunit,iv(2),iv(4),iv(6),iv(14),iv(5),iv(17),iv(iv(7)),
|
||||
+iv(iv(8)),iv(iv(9)),iv(iv(10)),wv(iv(11)),wv(iv(13)),wv(iv(12)))
|
||||
return
|
||||
end
|
||||
33
pmsco/makefile
Normal file
33
pmsco/makefile
Normal file
@@ -0,0 +1,33 @@
|
||||
SHELL=/bin/sh
|
||||
|
||||
# makefile for EDAC, MSC, and MUFPOT programs and modules
|
||||
#
|
||||
# see the top-level makefile for additional information.
|
||||
|
||||
.PHONY: all clean edac loess msc mufpot
|
||||
|
||||
EDAC_DIR = edac
|
||||
MSC_DIR = msc
|
||||
MUFPOT_DIR = mufpot
|
||||
LOESS_DIR = loess
|
||||
|
||||
all: edac loess
|
||||
|
||||
edac:
|
||||
$(MAKE) -C $(EDAC_DIR)
|
||||
|
||||
loess:
|
||||
$(MAKE) -C $(LOESS_DIR)
|
||||
|
||||
msc:
|
||||
$(MAKE) -C $(MSC_DIR)
|
||||
|
||||
mufpot:
|
||||
$(MAKE) -C $(MUFPOT_DIR)
|
||||
|
||||
clean:
|
||||
$(MAKE) -C $(EDAC_DIR) clean
|
||||
$(MAKE) -C $(LOESS_DIR) clean
|
||||
$(MAKE) -C $(MSC_DIR) clean
|
||||
$(MAKE) -C $(MUFPOT_DIR) clean
|
||||
rm -f *.pyc
|
||||
1
pmsco/msc/.gitignore
vendored
Normal file
1
pmsco/msc/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
revision.f
|
||||
1
pmsco/msc/__init__.py
Normal file
1
pmsco/msc/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__author__ = 'muntwiler_m'
|
||||
49
pmsco/msc/makefile
Normal file
49
pmsco/msc/makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
SHELL=/bin/sh
|
||||
|
||||
# makefile for MSC program and module
|
||||
#
|
||||
# the MSC source code is not included in the public distribution.
|
||||
# please obtain the MSC code from the original author,
|
||||
# and copy it to this directory before compilation.
|
||||
#
|
||||
# see the top-level makefile for additional information.
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
|
||||
.PHONY: all clean edac msc mufpot
|
||||
|
||||
FC=gfortran
|
||||
FCCOPTS=
|
||||
F2PY=f2py
|
||||
F2PYOPTS=
|
||||
CC=gcc
|
||||
CCOPTS=
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
|
||||
all: msc
|
||||
|
||||
msc: msc.exe msc.so
|
||||
|
||||
msc.exe: msc.f param.f common.f phases.f angles.f revision.f
|
||||
$(FC) $(FCOPTS) -o msc.exe msc.f phases.f angles.f
|
||||
|
||||
#msc.pyf currently needs a manual edit before compiling.
|
||||
#this target should execute only if it doesn't exist.
|
||||
msc.pyf: | msc.f phases.f angles.f
|
||||
$(F2PY) -h msc.pyf -m msc msc.f phases.f angles.f only: mscmain anglesarray anglesfile ps
|
||||
$(error msc.pyf auto-generated - must be edited manually before build can continue!)
|
||||
|
||||
msc.so: msc.f param.f common.f phases.f angles.f revision.f msc.pyf
|
||||
$(F2PY) -c $(F2PYOPTS) msc.pyf msc.f phases.f angles.f -m msc
|
||||
|
||||
revision.f: msc.f
|
||||
echo " character*50 coderev" > revision.f
|
||||
echo " parameter(coderev=" >> revision.f
|
||||
git log --pretty=format:" ='Code revision %h, %ad')" --date=iso -1 $< >> $@ || echo " ='Code revision unknown, "`date +"%F %T %z"`"')" >> $@
|
||||
|
||||
clean:
|
||||
rm -f *.so *.o *.exe
|
||||
rm -f revision.f
|
||||
112
pmsco/msc_calculator.py
Normal file
112
pmsco/msc_calculator.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""
|
||||
@package pmsco.msc_calculator
|
||||
Kaduwela MSC program interface.
|
||||
|
||||
This module is currently not maintained.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import calculator
|
||||
import data as md
|
||||
import msc.msc as msc
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MscCalculator(calculator.Calculator):
|
||||
def write_input_file(self, params, filepath):
|
||||
with open(filepath, "w") as f:
|
||||
f.write(" %s\n" % (params.title) )
|
||||
f.write(" %s\n" % (params.comment) )
|
||||
l_init = "spdf".index(params.initial_state[1])
|
||||
f.write(" %4u\n" % (l_init) )
|
||||
f.write(" %4u\n" % (params.spherical_order) )
|
||||
f.write(" %s\n" % (params.polarization) )
|
||||
f.write(" %4u\n" % (params.scattering_level) )
|
||||
f.write(" %7.2f%7.2f\n" % (params.fcut, params.cut) )
|
||||
f.write(" %12.6f\n" % (params.angular_broadening) )
|
||||
f.write(" %12.6f\n" % (params.lattice_constant) )
|
||||
f.write(" %12.6f\n" % (params.z_surface) )
|
||||
f.write(" %4u\n" % (params.atom_types) )
|
||||
for iat in range(params.atom_types):
|
||||
f.write(" %4u %s\n" % (params.atomic_number[iat], "..."))
|
||||
f.write(" %4u %s\n" % (params.atomic_number[iat], params.phase_file[iat]))
|
||||
f.write(" %12.6f\n" % (params.msq_displacement[iat]) )
|
||||
f.write(" %12.6f\n" % (params.planewave_attenuation) )
|
||||
f.write(" %12.6f\n" % (params.inner_potential) )
|
||||
f.write(" %12.6f\n" % (params.symmetry_range) )
|
||||
f.write(" %12.6f\n" % (params.polar_incidence_angle) )
|
||||
f.write(" %12.6f\n" % (params.azimuthal_incidence_angle) )
|
||||
f.write(" %s\n" % (params.vibration_model) )
|
||||
f.write(" %12.6f\n" % (params.substrate_atomic_mass) )
|
||||
f.write(" %12.6f\n" % (params.experiment_temperature) )
|
||||
f.write(" %12.6f\n" % (params.debye_temperature) )
|
||||
f.write(" %12.6f\n" % (params.debye_wavevector) )
|
||||
f.write(" %12.6f%7.3f\n" % (params.rme_minus_value, params.rme_minus_shift) )
|
||||
f.write(" %12.6f%7.3f\n" % (params.rme_plus_value, params.rme_plus_shift) )
|
||||
f.write(" %4u\n" % (1) )
|
||||
f.write(" %4u %12.6f\n" % (1, 1.0) )
|
||||
|
||||
def run(self, params, cluster, scan, output_file):
|
||||
"""
|
||||
run the MSC program with the given parameters and cluster.
|
||||
|
||||
@param params: a project.Params() object with all necessary values except cluster and output files set.
|
||||
|
||||
@param cluster: a cluster.Cluster(format=FMT_MSC) object with all atom positions set.
|
||||
|
||||
@param scan: a project.Scan() object with all relevant parameters set.
|
||||
in particular, a scan file is required.
|
||||
|
||||
@param output_file: base name for all intermediate and output files
|
||||
|
||||
@return: result_file, files_cats
|
||||
|
||||
the scan file must be in ETP(IS) format:
|
||||
* column 0: kinetic energy in eV
|
||||
* column 1: polar angle in degrees
|
||||
* column 2: azimuthal angle in degrees
|
||||
* further columns are ignored
|
||||
"""
|
||||
|
||||
# generate file names
|
||||
base_filename = output_file
|
||||
clu_filename = base_filename + ".clu"
|
||||
out_filename = base_filename + ".list"
|
||||
par_filename = base_filename + ".par"
|
||||
dat_filename = base_filename + ".plt1"
|
||||
etpi_filename = base_filename + ".etpi"
|
||||
|
||||
# fix MSC particularities
|
||||
# singularity at theta == polar_incidence_angle
|
||||
if params.polar_incidence_angle == 60.0:
|
||||
params.polar_incidence_angle += 0.1
|
||||
|
||||
# save parameter files
|
||||
cluster.save_to_file(clu_filename)
|
||||
self.write_input_file(params, par_filename)
|
||||
|
||||
if logger.isEnabledFor(logging.INFO):
|
||||
options = "11"
|
||||
else:
|
||||
options = "00"
|
||||
revision = ""
|
||||
|
||||
# run MSC
|
||||
msc.mscmain(par_filename, clu_filename, scan.filename, base_filename, revision, options)
|
||||
|
||||
# load results
|
||||
result_etpi = md.load_plt(dat_filename)
|
||||
md.save_data(etpi_filename, result_etpi)
|
||||
|
||||
files = {clu_filename: 'input', par_filename: 'input', dat_filename: 'output', base_filename: 'log',
|
||||
out_filename: 'log', etpi_filename: 'energy'}
|
||||
return etpi_filename, files
|
||||
1
pmsco/mufpot/__init__.py
Normal file
1
pmsco/mufpot/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
__author__ = 'muntwiler_m'
|
||||
46
pmsco/mufpot/makefile
Normal file
46
pmsco/mufpot/makefile
Normal file
@@ -0,0 +1,46 @@
|
||||
SHELL=/bin/sh
|
||||
|
||||
# makefile for MUFPOT program and module
|
||||
#
|
||||
# the MUFPOT source code is not included in the public distribution.
|
||||
# please obtain the MUFPOT code from the original author,
|
||||
# and copy it to this directory before compilation.
|
||||
#
|
||||
# see the top-level makefile for additional information.
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
|
||||
.PHONY: all clean edac msc mufpot
|
||||
|
||||
FC=gfortran
|
||||
FCCOPTS=
|
||||
F2PY=f2py
|
||||
F2PYOPTS=
|
||||
CC=gcc
|
||||
CCOPTS=
|
||||
SWIG=swig
|
||||
SWIGOPTS=
|
||||
PYTHON=python
|
||||
PYTHONOPTS=
|
||||
|
||||
all: mufpot
|
||||
|
||||
mufpot: mufpot.exe mufpot.so
|
||||
|
||||
mufpot.exe: mufpot.f
|
||||
$(FC) $(FCOPTS) -o mufpot.exe mufpot.f
|
||||
|
||||
mufpot.pyf: | mufpot.f
|
||||
$(F2PY) -h mufpot.pyf -m mufpot mufpot.f only: mufpot
|
||||
|
||||
mufpot.so: mufpot.f mufpot.pyf
|
||||
$(F2PY) -c $(F2PYOPTS) mufpot.pyf mufpot.f -m mufpot
|
||||
|
||||
revision.f: msc.f
|
||||
echo " character*50 coderev" > revision.f
|
||||
echo " parameter(coderev=" >> revision.f
|
||||
git log --pretty=format:" ='Code revision %h, %ad')" --date=iso -1 $< >> $@ || echo " ='Code revision unknown, "`date +"%F %T %z"`"')" >> $@
|
||||
|
||||
clean:
|
||||
rm -f *.so *.o *.exe
|
||||
rm -f revision.f
|
||||
381
pmsco/pmsco.py
Executable file
381
pmsco/pmsco.py
Executable file
@@ -0,0 +1,381 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
@package pmsco.pmsco
|
||||
PEARL Multiple-Scattering Calculation and Structural Optimization
|
||||
|
||||
this is the main entry point and top-level interface of the PMSCO package.
|
||||
all calculations (any mode, any project) start by calling the main_pmsco() function of this module.
|
||||
the module also provides a command line parser.
|
||||
|
||||
command line usage: call with -h option to see the list of arguments.
|
||||
|
||||
python usage: call main_pmsco() with suitable arguments.
|
||||
|
||||
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
||||
note that in parallel mode, one process takes the role of the coordinator (master).
|
||||
the master does not run calculations and is idle most of the time.
|
||||
to benefit from parallel execution on a work station, NN should be the number of processors plus one.
|
||||
on a cluster, the number of processes is chosen according to the available resources.
|
||||
|
||||
all calculations can also be run in a single process.
|
||||
PMSCO serializes the calculations automatically.
|
||||
|
||||
the code of the main module is independent of a particular calculation project.
|
||||
all project-specific code must be in a separate python module.
|
||||
the project module must implement a class derived from pmsco.project.Project,
|
||||
and a global function create_project which returns a new instance of the derived project class.
|
||||
refer to the projects folder for examples.
|
||||
|
||||
@pre
|
||||
* python 2.7, including python-pip
|
||||
* numpy
|
||||
* nose from Debian python-nose
|
||||
* statsmodels from Debian python-statsmodels, or PyPI (https://pypi.python.org/pypi/statsmodels)
|
||||
* periodictable from PyPI (https://pypi.python.org/pypi/periodictable)
|
||||
* mpi4py from PyPI (the Debian package may have a bug causing the program to crash)
|
||||
* OpenMPI, including libopenmpi-dev
|
||||
* SWIG from Debian swig
|
||||
|
||||
to install a PyPI package, e.g. periodictable, do
|
||||
@code{.sh}
|
||||
pip install --user periodictable
|
||||
@endcode
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import os.path
|
||||
import sys
|
||||
import datetime
|
||||
import argparse
|
||||
import logging
|
||||
import cluster
|
||||
import dispatch
|
||||
import handlers
|
||||
import files
|
||||
import calculator
|
||||
import swarm
|
||||
import grid
|
||||
# import gradient
|
||||
from mpi4py import MPI
|
||||
|
||||
# the module-level logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
|
||||
"""
|
||||
configure the root logger. direct the logs either to a file or the null handler.
|
||||
|
||||
this function must be called before the first logging command
|
||||
whether a log output is requested or not.
|
||||
to disable logging, call this function with enable=False (default).
|
||||
|
||||
modules should create their own loggers, by calling
|
||||
@code logger = logging.getLogger(__name__) @endcode
|
||||
at the top of the module code.
|
||||
that logger is then used by calls like
|
||||
@code logger.debug(message) @endcode.
|
||||
|
||||
@param enable: (bool) True=enable logging to the specified file,
|
||||
False=do not generate a log (null handler).
|
||||
@param filename: (string) path and name of the log file.
|
||||
if this process is part of an MPI communicator,
|
||||
the function inserts a dot and the MPI rank of this process before the extension.
|
||||
@param level: (string) name of the log level.
|
||||
must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
|
||||
if empty or invalid, the function raises a ValueError.
|
||||
@return None
|
||||
"""
|
||||
numeric_level = getattr(logging, level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError('Invalid log level: %s' % level)
|
||||
|
||||
logger = logging.getLogger("")
|
||||
logger.setLevel(numeric_level)
|
||||
|
||||
logformat = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
||||
formatter = logging.Formatter(logformat)
|
||||
|
||||
if enable:
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_size = mpi_comm.Get_size()
|
||||
if mpi_size > 1:
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
root, ext = os.path.splitext(filename)
|
||||
filename = root + "." + str(mpi_rank) + ext
|
||||
|
||||
handler = logging.FileHandler(filename, mode="w", delay=True)
|
||||
handler.setLevel(numeric_level)
|
||||
|
||||
handler.setFormatter(formatter)
|
||||
else:
|
||||
handler = logging.NullHandler()
|
||||
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def set_common_args(project, args):
|
||||
"""
|
||||
set common project arguments from parsed command line.
|
||||
|
||||
this function translates and distributes the common arguments from the command line parser
|
||||
to the respective destinations.
|
||||
as of this writing, there are two destinations: the global logger and the project instance.
|
||||
|
||||
note that run_project() is called with the project instance as the only argument.
|
||||
all project-related arguments from the command line must therefore be copied to the project object.
|
||||
|
||||
@param args: a namespace object containing the necessary parameters.
|
||||
this can be an instance of Args, or the return value of parse_cli(),
|
||||
or any object which has the same attributes as the Args class.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
log_file = "pmsco.log"
|
||||
|
||||
if args.data_dir:
|
||||
project.data_dir = args.data_dir
|
||||
if args.output_file:
|
||||
project.set_output(args.output_file)
|
||||
log_file = args.output_file + ".log"
|
||||
if args.log_file:
|
||||
log_file = args.log_file
|
||||
setup_logging(enable=args.log_enable, filename=log_file, level=args.log_level)
|
||||
|
||||
logger.debug("creating project")
|
||||
mode = args.mode.lower()
|
||||
if mode in {'single', 'grid', 'swarm'}:
|
||||
project.mode = mode
|
||||
else:
|
||||
logger.error("invalid optimization mode '%s'.", mode)
|
||||
|
||||
if args.pop_size:
|
||||
project.pop_size = args.pop_size
|
||||
|
||||
code = args.code.lower()
|
||||
if code in {'edac', 'msc', 'test'}:
|
||||
project.code = code
|
||||
else:
|
||||
logger.error("invalid code argument")
|
||||
|
||||
if args.time_limit:
|
||||
project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))
|
||||
|
||||
if args.keep_files:
|
||||
if "all" in args.keep_files:
|
||||
cats = set([])
|
||||
else:
|
||||
cats = files.FILE_CATEGORIES - set(args.keep_files)
|
||||
cats -= {'report'}
|
||||
if mode == 'single':
|
||||
cats -= {'model'}
|
||||
project.files.categories_to_delete = cats
|
||||
|
||||
|
||||
def log_project_args(project):
|
||||
"""
|
||||
send some common project arguments to the log.
|
||||
|
||||
@param project: project instance (sub-class of pmsco.project.Project).
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
logger.info("scattering code: {0}".format(project.code))
|
||||
logger.info("optimization mode: {0}".format(project.mode))
|
||||
logger.info("minimum swarm size: {0}".format(project.pop_size))
|
||||
|
||||
logger.info("data directory: {0}".format(project.data_dir))
|
||||
logger.info("output file: {0}".format(project.output_file))
|
||||
|
||||
_files_to_keep = files.FILE_CATEGORIES - project.files.categories_to_delete
|
||||
logger.info("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
|
||||
except AttributeError:
|
||||
logger.warning("AttributeError in log_project_args")
|
||||
|
||||
|
||||
def run_project(project):
|
||||
"""
|
||||
run a calculation project.
|
||||
|
||||
@param project:
|
||||
@return:
|
||||
"""
|
||||
log_project_args(project)
|
||||
|
||||
optimizer_class = None
|
||||
if project.mode == 'single':
|
||||
optimizer_class = handlers.SingleModelHandler
|
||||
elif project.mode == 'grid':
|
||||
optimizer_class = grid.GridSearchHandler
|
||||
elif project.mode == 'swarm':
|
||||
optimizer_class = swarm.ParticleSwarmHandler
|
||||
elif project.mode == 'gradient':
|
||||
logger.error("gradient search not implemented")
|
||||
# TODO: implement gradient search
|
||||
# optimizer_class = gradient.GradientSearchHandler
|
||||
else:
|
||||
logger.error("invalid optimization mode '%s'.", project.mode)
|
||||
project.handler_classes['model'] = optimizer_class
|
||||
|
||||
project.handler_classes['region'] = handlers.choose_region_handler_class(project)
|
||||
|
||||
calculator_class = None
|
||||
if project.code == 'edac':
|
||||
logger.debug("importing EDAC interface")
|
||||
import edac_calculator
|
||||
project.cluster_format = cluster.FMT_EDAC
|
||||
calculator_class = edac_calculator.EdacCalculator
|
||||
elif project.code == 'msc':
|
||||
logger.debug("importing MSC interface")
|
||||
import msc_calculator
|
||||
project.cluster_format = cluster.FMT_MSC
|
||||
calculator_class = msc_calculator.MscCalculator
|
||||
elif project.code == 'test':
|
||||
logger.debug("importing TEST interface")
|
||||
project.cluster_format = cluster.FMT_EDAC
|
||||
calculator_class = calculator.TestCalculator
|
||||
else:
|
||||
logger.error("invalid code argument")
|
||||
project.calculator_class = calculator_class
|
||||
|
||||
if project and optimizer_class and calculator_class:
|
||||
logger.info("starting calculations")
|
||||
try:
|
||||
dispatch.run_calculations(project)
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
raise
|
||||
except Exception as __:
|
||||
logger.exception("unhandled exception during calculations.")
|
||||
raise
|
||||
else:
|
||||
logger.info("calculations complete")
|
||||
else:
|
||||
logger.error("undefined project, optimizer, or calculator.")
|
||||
|
||||
|
||||
class Args(object):
|
||||
"""
|
||||
arguments of the main function.
|
||||
|
||||
this class can be used to set up an arguments object for the main
|
||||
function as an alternative to the __main__ function which parses
|
||||
command line arguments.
|
||||
|
||||
the constructor initializes the attributes with the same default
|
||||
values as the command line parser.
|
||||
"""
|
||||
|
||||
def __init__(self, mode="single", code="edac", output_file=""):
|
||||
"""
|
||||
constructor.
|
||||
|
||||
the parameters are the same as for the command line interface.
|
||||
project and mode are mandatory.
|
||||
other parameters may be required depending on the project
|
||||
and/or the calculation mode.
|
||||
"""
|
||||
self.mode = mode
|
||||
self.pop_size = 0
|
||||
self.code = code
|
||||
self.data_dir = os.getcwd()
|
||||
self.output_file = output_file
|
||||
self.time_limit = 24.0
|
||||
self.keep_files = []
|
||||
self.log_level = "WARNING"
|
||||
self.log_file = ""
|
||||
self.log_enable = True
|
||||
|
||||
|
||||
def get_cli_parser(default_args=None):
|
||||
if not default_args:
|
||||
default_args = Args()
|
||||
|
||||
KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""
|
||||
multiple-scattering calculations and optimization
|
||||
|
||||
you must call pmsco.py from a project file which defines the calculation project.
|
||||
the project file must be a regular Python module and define:
|
||||
|
||||
1) a project class derived from pmsco.project.Project.
|
||||
the class implements/overrides all necessary methods of the calculation project,
|
||||
in particular create_domain, create_cluster, and create_params.
|
||||
|
||||
2) a global function named create_project.
|
||||
the function accepts a namespace object from the argument parser.
|
||||
it may evaluate extra, project-specific arguments.
|
||||
it does not need to evaluate the common parameters described below.
|
||||
the function must return an instance of the project class described above.
|
||||
|
||||
3) main code that parses the command line and calls pmsco.pmsco.main_pmsco().
|
||||
(see the projects folder for examples).
|
||||
""")
|
||||
# the required argument list may depend on the calculation mode.
|
||||
# for simplicity, the parser does not check these requirements.
|
||||
# all parameters are optional and accepted regardless of mode.
|
||||
# errors may occur if implicit requirements are not met.
|
||||
parser.add_argument('-m', '--mode', default='single',
|
||||
choices=['single', 'grid', 'swarm', 'gradient'],
|
||||
help='calculation mode')
|
||||
parser.add_argument('--pop-size', type=int, default=0,
|
||||
help='population size (number of particles) in swarm optimization mode. ' +
|
||||
'default is the greater of 4 or two times the number of calculation processes.')
|
||||
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default="edac",
|
||||
help='scattering code (default: edac)')
|
||||
parser.add_argument('-d', '--data-dir', default=os.getcwd(),
|
||||
help='directory path for experimental data files (if required by project). ' +
|
||||
'default: working directory')
|
||||
parser.add_argument('-o', '--output-file',
|
||||
help='base path for intermediate and output files.' +
|
||||
'default: pmsco_data')
|
||||
parser.add_argument('-k', '--keep-files', nargs='*', default=files.FILE_CATEGORIES_TO_KEEP,
|
||||
choices=KEEP_FILES_CHOICES,
|
||||
help='output file categories to keep after the calculation. '
|
||||
'by default, cluster and model (simulated data) '
|
||||
'of a limited number of best models are kept.')
|
||||
parser.add_argument('-t', '--time-limit', type=float, default=24.0,
|
||||
help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
|
||||
parser.add_argument('--log-file', default=default_args.log_file,
|
||||
help='name of the main log file. ' +
|
||||
'under MPI, the rank of the process is inserted before the extension. ' +
|
||||
'defaults: output file + log, or pmsco.log.')
|
||||
parser.add_argument('--log-level', default=default_args.log_level,
|
||||
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL. default: WARNING.')
|
||||
feature_parser = parser.add_mutually_exclusive_group(required=False)
|
||||
feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
|
||||
help="enable logging. by default, logging is on.")
|
||||
feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false',
|
||||
help="disable logging. by default, logging is on.")
|
||||
parser.set_defaults(log_enable=default_args.log_enable)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def parse_cli():
|
||||
"""
|
||||
parse the command line interface
|
||||
|
||||
@return: Namespace object created by the argument parser.
|
||||
"""
|
||||
default_args = Args()
|
||||
parser = get_cli_parser(default_args)
|
||||
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
|
||||
return args, unknown_args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main_parser = get_cli_parser()
|
||||
main_parser.print_help()
|
||||
sys.exit(0)
|
||||
995
pmsco/project.py
Normal file
995
pmsco/project.py
Normal file
@@ -0,0 +1,995 @@
|
||||
"""
|
||||
@package pmsco.project
|
||||
project-independent classes which store and handle model parameters.
|
||||
|
||||
the most important class defined here is Project.
|
||||
each calculation project needs to derive its own project class from it.
|
||||
the Domain and Params classes are typically used unchanged.
|
||||
|
||||
@note nomenclature: the term @e parameters has several meanings in the code and documentation.
|
||||
the following distinctive terms are used in updated documentation sections.
|
||||
ambiguous terms may still be present in older code sections.
|
||||
@arg <em>calculation parameters</em> set of specific parameters passed as input to the calculation programs.
|
||||
the amount and meaning of these parameters depend on the calculation code used.
|
||||
typically, many of these parameters remain fixed, or change very rarely in the course of the study.
|
||||
@arg <em>model parameters</em> concise set of independent physical parameters
|
||||
that define the system in one calculation instance.
|
||||
these parameters are varied systematically by the optimization process.
|
||||
they are mapped to calculation parameters and a cluster by code derived from the Project class.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import copy
|
||||
import datetime
|
||||
import logging
|
||||
import numpy as np
|
||||
import collections
|
||||
import data as md
|
||||
import cluster as mc
|
||||
import files
|
||||
import handlers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ParamDomain = collections.namedtuple('ParamDomain', ['start', 'min', 'max', 'step'])
|
||||
|
||||
|
||||
class Domain(object):
|
||||
"""
|
||||
Domain of model parameters.
|
||||
|
||||
Each member contains a dictionary of model parameter names and their values.
|
||||
Parameter names can be defined almost freely by the project,
|
||||
except that they should contain only alphanumeric and underscore characters.
|
||||
furthermore, names starting with an underscore are reserved for the optimizers.
|
||||
"""
|
||||
|
||||
## @var start (dict)
|
||||
# dictionary of start values for each model parameter.
|
||||
#
|
||||
# the start value can be the initial guess for an optimization run,
|
||||
# or the actual value for a single calculation.
|
||||
#
|
||||
# there must be one item for each model parameter,
|
||||
# where the key is the name of the parameter, and the value its physical value.
|
||||
|
||||
## @var min (dict)
|
||||
# dictionary of minimum values for each model parameter.
|
||||
#
|
||||
# the minimum defines the lower bound of the allowed interval for a model parameter.
|
||||
#
|
||||
# there must be one item for each model parameter,
|
||||
# where the key is the name of the parameter, and the value its physical value.
|
||||
|
||||
## @var max (dict)
|
||||
# dictionary of maximum values for each model parameter.
|
||||
#
|
||||
# the maximum defines the upper bound of the allowed interval for a model parameter.
|
||||
#
|
||||
# there must be one item for each model parameter,
|
||||
# where the key is the name of the parameter, and the value its physical value.
|
||||
|
||||
## @var step (dict)
|
||||
# dictionary of step sizes for each model parameter.
|
||||
#
|
||||
# depending on the optimization mode, the step is a guess of how fast values should vary,
|
||||
# e.g. step size, gradient, velocity, ...
|
||||
#
|
||||
# there must be one item for each model parameter,
|
||||
# where the key is the name of the parameter, and the value its physical value.
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the domain object with empty dictionaries.
|
||||
"""
|
||||
self.start = {}
|
||||
self.min = {}
|
||||
self.max = {}
|
||||
self.step = {}
|
||||
|
||||
def add_param(self, name, start, min, max, step):
|
||||
"""
|
||||
set the domain of one parameter with all necessary values at once.
|
||||
|
||||
the exact meaning of the arguments depends on the calculation mode.
|
||||
|
||||
@param name (string) name of the parameter (alphanumeric and underscore characters only).
|
||||
it is recommended to use short but distinctive names.
|
||||
|
||||
@param start (float) start value.
|
||||
|
||||
@param min (float) lower bound of the parameter interval.
|
||||
|
||||
@param max (float) upper bound of the parameter interval.
|
||||
|
||||
@param step (float) step size.
|
||||
"""
|
||||
self.start[name] = start
|
||||
self.min[name] = min
|
||||
self.max[name] = max
|
||||
self.step[name] = step
|
||||
|
||||
def get_param(self, name):
|
||||
"""
|
||||
get all values of a model parameter in a tuple.
|
||||
|
||||
@param name (string) name of the parameter.
|
||||
|
||||
@return named tuple ParamDomain(start, min, max, step) of the parameter.
|
||||
|
||||
@raise IndexError if the parameter is not defined.
|
||||
"""
|
||||
return ParamDomain(self.start[name], self.min[name], self.max[name], self.step[name])
|
||||
|
||||
|
||||
class Params(object):
|
||||
"""
|
||||
calculation parameters for a single scattering calculation job.
|
||||
|
||||
this class holds all the calculation parameters that are passed via input file to the calculation program.
|
||||
|
||||
the class can hold parameters for both the MSC and EDAC codes.
|
||||
some parameters are used by both codes, others are used just by one of them.
|
||||
newer features such as multiple emitters, multiple symmetries, and others are supported in EDAC mode only.
|
||||
MSC mode is currently not maintained.
|
||||
|
||||
objects of this class are created by the implementation of the create_params() method
|
||||
of the actual project class.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.title = "MSC default parameters"
|
||||
self.comment = "from msc_project.Params()"
|
||||
self.cluster_file = ""
|
||||
self.output_file = ""
|
||||
self.scan_file = ""
|
||||
# EDAC convention: 1s, 2p, 2p1/2, etc.
|
||||
self.initial_state = "1s"
|
||||
# MSC convention: H, V, L, R, U
|
||||
self.polarization = "H"
|
||||
self.angular_broadening = 0.0
|
||||
self.z_surface = 0.0
|
||||
self.inner_potential = 10.0
|
||||
# the energy scale of EDAC is referenced to the vacuum level
|
||||
# but data files are referenced to the Fermi level
|
||||
# the msc_edac module adds the work function to the kinetic energy before it calls EDAC
|
||||
self.work_function = 0.0
|
||||
self.symmetry_range = 360.0
|
||||
self.polar_incidence_angle = 60.0
|
||||
self.azimuthal_incidence_angle = 0.0
|
||||
self.experiment_temperature = 300.0
|
||||
self.debye_temperature = 400.0
|
||||
self.debye_wavevector = 1.0
|
||||
# used by MSC only
|
||||
self.spherical_order = 2
|
||||
self.scattering_level = 5
|
||||
self.fcut = 15.0
|
||||
self.cut = 15.0
|
||||
self.lattice_constant = 1.0
|
||||
self.atom_types = 0
|
||||
self.atomic_number = [1, 2, 3, 4]
|
||||
self.phase_file = ["1.pha", "2.pha", "3.pha", "4.pha"]
|
||||
self.msq_displacement = [0.1, 0.1, 0.1, 0.1]
|
||||
self.planewave_attenuation = 1.0
|
||||
self.vibration_model = "N"
|
||||
self.substrate_atomic_mass = 1.0
|
||||
self.rme_minus_value = 0.5
|
||||
self.rme_minus_shift = 0.0
|
||||
self.rme_plus_value = 0.5
|
||||
self.rme_plus_shift = 0.0
|
||||
# used by EDAC only
|
||||
self.emitters = [(0.0, 0.0, 0.0, 0)]
|
||||
self.lmax = 15
|
||||
self.dmax = 5.0
|
||||
self.orders = [20]
|
||||
|
||||
|
||||
class Scan(object):
|
||||
"""
|
||||
class to describe the scanning scheme or store the experimental data set.
|
||||
"""
|
||||
|
||||
## @var filename (string)
|
||||
# file name from which a scan was loaded
|
||||
|
||||
## @var raw_data (numpy.ndarray)
|
||||
# original scan data (ETPAIS array)
|
||||
|
||||
## @var dtype (dict)
|
||||
# data type of self.raw_data.
|
||||
#
|
||||
# one of the data.DTYPE_Xxxx constants.
|
||||
|
||||
## @var modulation (numpy.ndarray)
|
||||
# modulation function calculated from original scan (ETPAIS array)
|
||||
|
||||
## @var mode (list of characters)
|
||||
# list of ETPAI column names which are scanned in self.raw_data.
|
||||
#
|
||||
# example: ['t','p']
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol of emitter atom
|
||||
#
|
||||
# example: 'Cu'
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
#
|
||||
# in the form expected by EDAC, for example: '1s'
|
||||
|
||||
## @var energies (numpy.ndarray)
|
||||
# kinetic energy referenced to Fermi level.
|
||||
#
|
||||
# one-dimensional array.
|
||||
|
||||
## @var thetas (numpy.ndarray)
|
||||
# polar angle referenced to normal emission
|
||||
#
|
||||
# one-dimensional array.
|
||||
#
|
||||
# note: in the case of a hemispherical scan, the values in this array will not be unique.
|
||||
|
||||
## @var phis (numpy.ndarray)
|
||||
# azimuthal angle referenced to arbitrary origin
|
||||
#
|
||||
# one-dimensional array.
|
||||
#
|
||||
# note: in the case of a hemispherical scan, the values in this array will not be unique, and not monotonic.
|
||||
|
||||
## @var alphas (numpy.ndarray)
|
||||
# polar angle referenced to normal emission
|
||||
#
|
||||
# one-dimensional array.
|
||||
|
||||
def __init__(self):
|
||||
self.filename = ""
|
||||
self.raw_data = None
|
||||
self.dtype = None
|
||||
self.modulation = None
|
||||
self.mode = []
|
||||
self.emitter = ""
|
||||
self.initial_state = "1s"
|
||||
self.energies = np.zeros((0))
|
||||
self.thetas = np.zeros((0))
|
||||
self.phis = np.zeros((0))
|
||||
self.alphas = np.zeros((0))
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
create a copy of the scan.
|
||||
|
||||
@return: new independent scan object with the same attributes as the original one.
|
||||
"""
|
||||
return copy.deepcopy(self)
|
||||
|
||||
def set_scan(self, filename, emitter, initial_state):
|
||||
"""
|
||||
set file name of reference experiment and load it.
|
||||
|
||||
the extension must be one of msc_data.DATATYPES (case insensitive)
|
||||
corresponding to the meaning of the columns in the file.
|
||||
|
||||
this method does not calculate the modulation function.
|
||||
|
||||
@attention EDAC can only calculate equidistant, rectangular scans.
|
||||
this version introduces holo scans as an experimental feature.
|
||||
for all other scan types, the scan file must exactly conform with a rectangular scan.
|
||||
the following scans are currently supported:
|
||||
|
||||
* intensity vs energy at fixed theta, phi
|
||||
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
|
||||
* intensity vs theta, phi, or alpha
|
||||
* holo scan (theta,phi)
|
||||
|
||||
@param filename: (string) file name of the experimental data, possibly including a path.
|
||||
|
||||
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
|
||||
|
||||
@param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
|
||||
|
||||
"""
|
||||
self.filename = filename
|
||||
self.emitter = emitter
|
||||
self.initial_state = initial_state
|
||||
|
||||
if self.filename:
|
||||
self.raw_data = md.load_data(self.filename)
|
||||
self.dtype = self.raw_data.dtype
|
||||
self.mode, positions = md.detect_scan_mode(self.raw_data)
|
||||
|
||||
if 'e' in self.mode:
|
||||
self.energies = positions['e']
|
||||
else:
|
||||
try:
|
||||
self.energies = np.asarray((self.raw_data['e'][0], ))
|
||||
except ValueError:
|
||||
logger.error("missing energy in scan file %s", self.filename)
|
||||
raise
|
||||
|
||||
if 't' in self.mode:
|
||||
self.thetas = positions['t']
|
||||
else:
|
||||
try:
|
||||
self.thetas = np.asarray((self.raw_data['t'][0], ))
|
||||
except ValueError:
|
||||
logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
|
||||
self.thetas = np.zeros((1))
|
||||
|
||||
if 'p' in self.mode:
|
||||
self.phis = positions['p']
|
||||
else:
|
||||
try:
|
||||
self.phis = np.asarray((self.raw_data['p'][0], ))
|
||||
except ValueError:
|
||||
logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
|
||||
self.phis = np.zeros((1))
|
||||
|
||||
if 'a' in self.mode:
|
||||
self.alphas = positions['a']
|
||||
else:
|
||||
try:
|
||||
self.alphas = np.asarray((self.raw_data['a'][0], ))
|
||||
except ValueError:
|
||||
logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
|
||||
self.alphas = np.zeros((1))
|
||||
|
||||
|
||||
class ClusterGenerator(object):
|
||||
"""
|
||||
cluster generator class.
|
||||
|
||||
this class bundles the cluster methods in one place
|
||||
so that it's easier to exchange them for different kinds of clusters.
|
||||
|
||||
the project must override at least the create_cluster method.
|
||||
if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
"""
|
||||
initialize the cluster generator.
|
||||
|
||||
@param project: reference to the project object.
|
||||
cluster generators may need to look up project parameters.
|
||||
"""
|
||||
self.project = project
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
return the number of emitter configurations for a particular model.
|
||||
|
||||
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
|
||||
by default, the method returns 1, which means that there is only one emitter configuration.
|
||||
|
||||
emitter configurations are mainly a way to distribute the calculations to multiple processes
|
||||
based on emitters since the resulting diffraction patterns add up incoherently.
|
||||
for this to work, the create_cluster() method must pay attention to the emitter index
|
||||
and generate either a full cluster with all emitters (single process)
|
||||
or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
|
||||
whether all emitters are calculated in one or multiple processes is decided at run-time
|
||||
based on the available resources.
|
||||
|
||||
note that this function returns the number of _configurations_ not _atoms_.
|
||||
an emitter configuration (declared in a Cluster) may include more than one atom.
|
||||
it is up to the project, what is included in a particular configuration.
|
||||
|
||||
to enable multiple emitter configurations, the derived project class must override this method
|
||||
and return a number greater than 1.
|
||||
|
||||
@note in some cases it may be most efficient to call create_cluster and
|
||||
return Cluster.get_emitter_count() of the generated cluster.
|
||||
this is possible because the method is called with emitter index -1.
|
||||
model and index can be passed unchanged to create_cluster.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index is -1 if called by the emitter handler.
|
||||
|
||||
@return number of emitter configurations.
|
||||
this implementation returns the default value of 1.
|
||||
"""
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
create a Cluster object given the model parameters and calculation index.
|
||||
|
||||
the generated cluster will typically depend on the model parameters.
|
||||
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
|
||||
|
||||
the scan index can be used to generate a different cluster for different scan geometry,
|
||||
e.g., if some atoms can be excluded due to a longer mean free path.
|
||||
if this is not the case for the specific project, the scan index can be ignored.
|
||||
|
||||
the symmetry index may select a particular domain that has a different atomic arrangement.
|
||||
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
|
||||
to the particular domain/symmetry.
|
||||
the method can ignore the symmetry index if the project defines only one symmetry,
|
||||
or if the symmetry does not correspond to a different atomic structure.
|
||||
|
||||
the emitter index selects a particular emitter configuration.
|
||||
depending on the value of the emitter index, the method must react differently:
|
||||
|
||||
1. if the value lower or equal to zero, return the full cluster and mark all inequivalent emitter atoms.
|
||||
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
|
||||
the full diffraction scan will be calculated in one calculation.
|
||||
|
||||
2. if the value is greater than zero, generate the cluster with the emitter configuration
|
||||
selected by the emitter index.
|
||||
the index is in the range between 1 and the return value of count_emitters().
|
||||
the results of the individual emitter calculations are summed up in combine_emitters().
|
||||
|
||||
the code should ideally be written such that either case yields the same diffraction result.
|
||||
if count_emitters() always returns 1 (default), the second case does not have to be implemented,
|
||||
and the method can ignore the emitter index.
|
||||
|
||||
the method must ignore the model and energy index.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
@arg @c emit emitter index.
|
||||
if lower or equal to zero, generate the full cluster and mark all emitters.
|
||||
if greater than zero, the value is a 1-based index of the emitter configuration.
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
class LegacyClusterGenerator(ClusterGenerator):
|
||||
"""
|
||||
cluster generator class for projects that don't declare a generator.
|
||||
|
||||
in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
|
||||
this class redirects generator calls to the project methods
|
||||
providing compatibility to older project code.
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
super(LegacyClusterGenerator, self).__init__(project)
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method if implemented.
|
||||
"""
|
||||
try:
|
||||
return self.project.count_emitters(model, index)
|
||||
except AttributeError:
|
||||
return 1
|
||||
|
||||
def create_cluster(self, model, index):
|
||||
"""
|
||||
redirect the call to the corresponding project method.
|
||||
"""
|
||||
return self.project.create_cluster(model, index)
|
||||
|
||||
|
||||
# noinspection PyMethodMayBeStatic
|
||||
class Project(object):
|
||||
"""
|
||||
base class of a calculation project.
|
||||
|
||||
a 'calculation project' is a coded set of prescriptions
|
||||
on how to get from a set of model parameters to simulated data
|
||||
which correspond to provided experimental data.
|
||||
the results include a measure of the quality of the simulated data compared to experimental data.
|
||||
|
||||
each calculation project must derive from this class.
|
||||
it must implement the create_domain(), create_cluster(), and create_params() methods.
|
||||
|
||||
the other methods and attributes of this class
|
||||
are for passing command line parameters to the calculation modules.
|
||||
the attributes should be populated in the constructor of the derived class,
|
||||
or (recommended) in the create_project() function of the module.
|
||||
it is essential that the attributes are set correctly before calculation.
|
||||
"""
|
||||
|
||||
## @var features (dictionary)
|
||||
#
|
||||
# calculation features and versions supported by the project.
|
||||
#
|
||||
# the dictionary contains key-value pairs where the key is the name of the feature and value is a version number.
|
||||
# this field conditionally enables new software features that may break backward compatibility.
|
||||
# derived projects should fill this field with the supported version
|
||||
# upon creation (in their __init__ method or create_project() factory).
|
||||
# version 0 (default) means that the feature is disabled.
|
||||
#
|
||||
# the following features can be enabled (list may be incomplete):
|
||||
# as of this version, no optional features are defined.
|
||||
#
|
||||
# @note rather than introducing new features and, particularly, new versions that rely on this mechanism,
|
||||
# developers of generic code should check whether backward compatibility could be achieved in a simpler way,
|
||||
# e.g. by implementing addition methods whose default behaviour is the same as of the previous version.
|
||||
# in some cases it may be better to refactor all current project code.
|
||||
#
|
||||
|
||||
## @var scans (list of Scan objects)
|
||||
# list of experimental or scan files for which calculations are to be run.
|
||||
#
|
||||
# the list must be populated by calling the add_scan() method.
|
||||
# this should be done in the create_project() function, or through the command line arguments.
|
||||
#
|
||||
# the modulation function is calculated internally.
|
||||
# if your scan files contain the modulation function (as opposed to intensity),
|
||||
# you must add the files in the create_project() function.
|
||||
# the command line does not support loading modulation functions.
|
||||
#
|
||||
# @c scans must be considered read-only. use project methods to change it.
|
||||
|
||||
## @var symmetries (list of arbitrary objects)
|
||||
# list of symmetries for which calculations are to be run.
|
||||
#
|
||||
# it is up to the derived class what kind of objects are stored in the list.
|
||||
# the recommended kind of objects are dictionaries which hold parameter values,
|
||||
# similar to the model dictionaries.
|
||||
#
|
||||
# the list must be populated by calling the add_symmetry() method.
|
||||
|
||||
## @var cluster_generator (ClusterGenerator object)
|
||||
# provides the cluster generator methods.
|
||||
#
|
||||
# a project must provide a cluster generator object that is derived from ClusterGenerator.
|
||||
# at least the ClusterGenerator.create_cluster method must be implemented.
|
||||
# if emitters should be run in parallel, the ClusterGenerator.count_emitters must be implemented as well.
|
||||
#
|
||||
# the initial value is a LegacyClusterGenerator object
|
||||
# which routes cluster calls back to the project for compatibility with older project code.
|
||||
|
||||
## @var pop_size (int)
|
||||
# population size (number of particles) in the particle swarm optimization.
|
||||
#
|
||||
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
|
||||
# you may want to override the default value in cases where the automatic choice is not appropriate, e.g.:
|
||||
# - the calculation of a model takes a long time compared to the available computing time.
|
||||
# - the calculation of a model spawns many sub-tasks due to complex symmetry.
|
||||
# - you want to increase the number of generations compared to the number of particles.
|
||||
#
|
||||
# the default value is 0.
|
||||
#
|
||||
# the value can be set by the command line.
|
||||
|
||||
## @var history_file (string)
|
||||
# name of a file containing the results from previous optimization runs.
|
||||
# this can be used to resume a swarm optimization where it was interrupted before.
|
||||
#
|
||||
# the history file is a space-delimited, multi-column, text file.
|
||||
# output files of a previous optimization run can be used as is.
|
||||
# there must be one column for each model parameter, and one column of R factors.
|
||||
# the first row must contain the names of the model parameters.
|
||||
# the name of th R factor column must be '_rfac'.
|
||||
# additional columns may be included and are ignored.
|
||||
#
|
||||
# by default, no history is loaded.
|
||||
|
||||
## @var recalc_history (bool)
|
||||
# select whether the R-factors of the historic models are calculated again.
|
||||
#
|
||||
# this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
||||
# or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
|
||||
## @var data_dir
|
||||
# directory path to experimental data.
|
||||
#
|
||||
# the project should load experimental data (scan files) from this path.
|
||||
# this attribute receives the --data-dir argument from the command line
|
||||
# if the project parses the common arguments (pmsco.set_common_args).
|
||||
#
|
||||
# it is up to the project to define where to load scan files from.
|
||||
# if the location of the files may depend on the machine or user account,
|
||||
# the user may want to specify the data path on the command line.
|
||||
|
||||
## @var output_file (string)
|
||||
# file name root for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
|
||||
## @var timedelta_limit (datetime.timedelta)
|
||||
# wall time after which no new calculations should be started.
|
||||
#
|
||||
# the actual wall time may be longer by the remaining time of running calculations.
|
||||
# running calculations will not be aborted.
|
||||
|
||||
## @var _combined_scan
|
||||
# combined raw data from scans.
|
||||
# updated by add_scan().
|
||||
|
||||
## @var _combined_modf
|
||||
# combined modulation function from scans.
|
||||
# updated by add_scan().
|
||||
|
||||
## @var files
|
||||
# list of all generated data files with metadata.
|
||||
# the list is used by model handlers to decide which files can be deleted at run time to save disk space.
|
||||
#
|
||||
# files.categories_to_delete determines which files can be deleted.
|
||||
|
||||
def __init__(self):
|
||||
self.mode = "single"
|
||||
self.code = "edac"
|
||||
self.features = {}
|
||||
self.cluster_format = mc.FMT_EDAC
|
||||
self.cluster_generator = LegacyClusterGenerator(self)
|
||||
self.scans = []
|
||||
self.symmetries = []
|
||||
self.pop_size = 0
|
||||
self.history_file = ""
|
||||
self.recalc_history = True
|
||||
self.data_dir = ""
|
||||
self.output_file = "pmsco_data"
|
||||
self.timedelta_limit = datetime.timedelta(days=1)
|
||||
self._combined_scan = None
|
||||
self._combined_modf = None
|
||||
self.files = files.FileTracker()
|
||||
self.handler_classes = {}
|
||||
self.handler_classes['model'] = handlers.SingleModelHandler
|
||||
self.handler_classes['scan'] = handlers.ScanHandler
|
||||
self.handler_classes['symmetry'] = handlers.SymmetryHandler
|
||||
self.handler_classes['emitter'] = handlers.EmitterHandler
|
||||
self.handler_classes['region'] = handlers.SingleRegionHandler
|
||||
self.calculator_class = None
|
||||
|
||||
def create_domain(self):
|
||||
"""
|
||||
create a msc_project.Domain object which defines the allowed range for model parameters.
|
||||
|
||||
this method must be implemented by the actual project class.
|
||||
the Domain object must declare all model parameters used in the project.
|
||||
|
||||
@return Domain object
|
||||
"""
|
||||
return None
|
||||
|
||||
def create_params(self, model, index):
|
||||
"""
|
||||
create a Params object given the model parameters and calculation index.
|
||||
|
||||
@param model (dictionary) model parameters to be used in the calculation.
|
||||
|
||||
@param index (named tuple CalcID) calculation index.
|
||||
the method should consider only the following attributes:
|
||||
@arg @c scan scan index (index into Project.scans)
|
||||
@arg @c sym symmetry index (index into Project.symmetries)
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear_scans(self):
|
||||
"""
|
||||
clear scans.
|
||||
|
||||
delete all scans in self.scans and empty the list.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.scans = []
|
||||
self._combined_scan = None
|
||||
self._combined_modf = None
|
||||
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
|
||||
"""
|
||||
add the file name of reference experiment and load it.
|
||||
|
||||
the extension must be one of msc_data.DATATYPES (case insensitive)
|
||||
corresponding to the meaning of the columns in the file.
|
||||
|
||||
caution: EDAC can only calculate equidistant, rectangular scans.
|
||||
the following scans are currently supported:
|
||||
|
||||
* intensity vs energy at fixed theta, phi
|
||||
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
|
||||
* intensity vs theta, phi, or alpha
|
||||
* intensity vs theta and phi (hemisphere or hologram scan)
|
||||
|
||||
the method calculates the modulation function if @c is_modf is @c False.
|
||||
it also updates @c _combined_scan and @c _combined_modf which may be used as R-factor comparison targets.
|
||||
|
||||
@param filename: (string) file name of the experimental data, possibly including a path.
|
||||
|
||||
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
|
||||
|
||||
@param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
|
||||
|
||||
@param is_modf: (bool) declares whether the file contains the modulation function (True),
|
||||
or intensity (False, default). In the latter case, the modulation function is calculated internally.
|
||||
|
||||
@param modf_model: (dict) model parameters to be passed to the modulation function.
|
||||
|
||||
@return (Scan) the new scan object (which is also a member of self.scans).
|
||||
|
||||
@todo the accepted scanning schemes should be generalized.
|
||||
"""
|
||||
scan = Scan()
|
||||
scan.set_scan(filename, emitter, initial_state)
|
||||
self.scans.append(scan)
|
||||
|
||||
if modf_model is None:
|
||||
modf_model = {}
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if is_modf:
|
||||
scan.modulation = scan.raw_data
|
||||
else:
|
||||
try:
|
||||
scan.modulation = self.calc_modulation(scan.raw_data, modf_model)
|
||||
except ValueError:
|
||||
logger.error("error calculating the modulation function of experimental data.")
|
||||
scan.modulation = None
|
||||
else:
|
||||
scan.modulation = None
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if self._combined_scan is not None:
|
||||
dtype = md.common_dtype((self._combined_scan, scan.raw_data))
|
||||
self._combined_scan = np.hstack((self._combined_scan, md.restructure_data(scan.raw_data, dtype)))
|
||||
else:
|
||||
self._combined_scan = scan.raw_data.copy()
|
||||
else:
|
||||
self._combined_scan = None
|
||||
|
||||
if scan.modulation is not None:
|
||||
if self._combined_modf is not None:
|
||||
dtype = md.common_dtype((self._combined_modf, scan.modulation))
|
||||
self._combined_modf = np.hstack((self._combined_modf, md.restructure_data(scan.modulation, dtype)))
|
||||
else:
|
||||
self._combined_modf = scan.modulation.copy()
|
||||
else:
|
||||
self._combined_modf = None
|
||||
|
||||
return scan
|
||||
|
||||
def clear_symmetries(self):
|
||||
"""
|
||||
clear symmetries.
|
||||
|
||||
delete all symmetries in self.symmetries and empty the list.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.symmetries = []
|
||||
|
||||
def add_symmetry(self, symmetry):
|
||||
"""
|
||||
add a symmetry to the list of symmetries.
|
||||
|
||||
this class declares the list of symmetries.
|
||||
it does not define what should be in the list of symmetries.
|
||||
however, there must be an entry for each symmetry to be calculated.
|
||||
if the list is empty, no calculation will be executed.
|
||||
|
||||
@attention initially, the symmetries list is empty.
|
||||
your project needs to add at least one symmetry.
|
||||
otherwise, no calculation will be executed.
|
||||
|
||||
@param symmetry: it is up to the derived project class to specify and interpret the data stored here.
|
||||
it is recommended to store a dictionary with symmetry parameters similar to the model parameters.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.symmetries.append(symmetry)
|
||||
|
||||
def set_output(self, filename):
|
||||
"""
|
||||
set base name of output file
|
||||
"""
|
||||
self.output_file = filename
|
||||
|
||||
def set_timedelta_limit(self, timedelta):
|
||||
"""
|
||||
set the walltime limit
|
||||
|
||||
timedelta (datetime.timedelta)
|
||||
"""
|
||||
self.timedelta_limit = timedelta
|
||||
|
||||
def combine_symmetries(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results of different symmetry into one result. calculate the modulation function.
|
||||
|
||||
the symmetry results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
by default, this method adds all symmetries with equal weight.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
the method must write the results to the files indicated
|
||||
by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each symmetry.
|
||||
the method must read the source data from the files
|
||||
indicated by the @c result_filename attributes.
|
||||
the sequence is sorted by task ID, i.e., essentially, by symmetry index.
|
||||
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty
|
||||
|
||||
@raise KeyError if a filename is missing
|
||||
|
||||
@note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
"""
|
||||
|
||||
result_data = None
|
||||
for task in child_tasks:
|
||||
data = md.load_data(task.result_filename)
|
||||
if result_data is not None:
|
||||
result_data['i'] += data['i']
|
||||
else:
|
||||
result_data = data
|
||||
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
# todo : the handling of missing modulation functions may need some cleanup
|
||||
if self.scans[parent_task.id.scan].modulation is not None:
|
||||
result_modf = self.calc_modulation(result_data, parent_task.model)
|
||||
md.save_data(parent_task.modf_filename, result_modf)
|
||||
else:
|
||||
parent_task.modf_filename = ""
|
||||
|
||||
def combine_emitters(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results of different emitters into one result. calculate the modulation function.
|
||||
|
||||
the emitter results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
by default, this method adds all emitters with equal weight.
|
||||
|
||||
sub-classes may override this method and implement expansion of equivalent emitters,
|
||||
unequal weights, etc.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the emitter tasks.
|
||||
the method must write the results to the files indicated
|
||||
by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each emitter.
|
||||
the method must read the source data from the files
|
||||
indicated by the @c result_filename attributes.
|
||||
the sequence is sorted by task ID, i.e., essentially, by the emitter index.
|
||||
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty
|
||||
|
||||
@raise KeyError if a filename is missing
|
||||
|
||||
@note the weights of the emitters (in derived classes) can be part of the optimizable model parameters.
|
||||
the model parameters are available as the @c model attribute of the calculation tasks.
|
||||
"""
|
||||
|
||||
result_data = None
|
||||
for task in child_tasks:
|
||||
data = md.load_data(task.result_filename)
|
||||
if result_data is not None:
|
||||
result_data['i'] += data['i']
|
||||
else:
|
||||
result_data = data
|
||||
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
# todo : the handling of missing modulation functions may need some cleanup
|
||||
if self.scans[parent_task.id.scan].modulation is not None:
|
||||
result_modf = self.calc_modulation(result_data, parent_task.model)
|
||||
md.save_data(parent_task.modf_filename, result_modf)
|
||||
else:
|
||||
parent_task.modf_filename = ""
|
||||
|
||||
def combine_scans(self, parent_task, child_tasks):
|
||||
"""
|
||||
combine results of different scans into one result, for intensity and modulation.
|
||||
|
||||
the scan results are read from the file system using the indices defined by the child_tasks,
|
||||
and the combined result is written to the file system with the index defined by parent_task.
|
||||
|
||||
the datasets of the scans are appended.
|
||||
this is done for intensity and modulation data independently.
|
||||
|
||||
@param parent_task: (CalculationTask) parent task of the symmetry tasks.
|
||||
the method must write the results to the files indicated
|
||||
by the @c result_filename and @c modf_filename attributes.
|
||||
|
||||
@param child_tasks: (sequence of CalculationTask) tasks which identify each scan.
|
||||
the method must read the source data from the files
|
||||
indicated by the @c result_filename attributes.
|
||||
the sequence is sorted by task ID, i.e., essentially, by scan index.
|
||||
|
||||
@return: None
|
||||
|
||||
@raise IndexError if child_tasks is empty.
|
||||
|
||||
@raise KeyError if a filename is missing.
|
||||
"""
|
||||
|
||||
# intensity
|
||||
try:
|
||||
stack1 = [md.load_data(task.result_filename) for task in child_tasks]
|
||||
except (KeyError, IOError):
|
||||
parent_task.result_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
result_data = np.hstack(tuple(stack2))
|
||||
md.save_data(parent_task.result_filename, result_data)
|
||||
|
||||
# modulation
|
||||
try:
|
||||
stack1 = [md.load_data(task.modf_filename) for task in child_tasks]
|
||||
except (KeyError, IOError):
|
||||
parent_task.modf_filename = ""
|
||||
else:
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
result_modf = np.hstack(tuple(stack2))
|
||||
md.save_data(parent_task.modf_filename, result_modf)
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def calc_modulation(self, data, model):
|
||||
"""
|
||||
calculate the project-dependent modulation function.
|
||||
|
||||
the modulation function of I(x) is (I(x) - S(x)) / S(x)
|
||||
where S(x) is a smooth copy of I(x).
|
||||
|
||||
by default, the modulation function is calculated by data.calc_modfunc_loess().
|
||||
override this method in your project to use a different modulation function.
|
||||
|
||||
@param data structured numpy.ndarray in EI, ETPI, or ETPAI format.
|
||||
can contain a one- or multi-dimensional scan.
|
||||
the scan coordinates must be on a rectangular or hemisperical grid.
|
||||
for maximum compatibility, the array should be sorted,
|
||||
though for the default calc_modfunc_loess() function this is not required.
|
||||
|
||||
if data contains a hemispherical scan, the phi dimension is ignored,
|
||||
i.e. the modulation function is calcualted on a phi-average.
|
||||
|
||||
@param model: (dict) model parameters of the calculation task.
|
||||
can be used to pass parameters from the project.
|
||||
this argument is a dictionary of the model parameters.
|
||||
|
||||
@return copy of the data array with the modulation function in the 'i' column.
|
||||
"""
|
||||
|
||||
return md.calc_modfunc_loess(data)
|
||||
|
||||
def calc_rfactor(self, task):
|
||||
"""
|
||||
calculate the R-factor of a task.
|
||||
|
||||
the method calculates the R-factor over the combined scans.
|
||||
the corresponding experimental data is taken from self._combined_modf.
|
||||
|
||||
this method is called by the model handler.
|
||||
|
||||
by default, the R-factor is calculated by data.rfactor() over the combined scans.
|
||||
override this method in your project to use a different R-factor algorithm.
|
||||
|
||||
@param task: (CalculationTask) a model task.
|
||||
|
||||
@return (int) calculated R-factor.
|
||||
"""
|
||||
task_data = md.load_data(task.modf_filename)
|
||||
result_r = md.rfactor(self._combined_modf, task_data)
|
||||
|
||||
return result_r
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
delete unwanted files at the end of a project.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.files.delete_files()
|
||||
909
pmsco/swarm.py
Normal file
909
pmsco/swarm.py
Normal file
@@ -0,0 +1,909 @@
|
||||
"""
|
||||
@package pmsco.swarm
|
||||
particle swarm optimization handler.
|
||||
|
||||
the module starts multiple MSC calculations and optimizes the model parameters
|
||||
according to the particle swarm optimization algorithm.
|
||||
|
||||
Particle swarm optimization adapted from
|
||||
D. A. Duncan et al., Surface Science 606, 278 (2012)
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import copy
|
||||
import os
|
||||
import datetime
|
||||
import logging
|
||||
import numpy as np
|
||||
import handlers
|
||||
from helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
CONSTRAIN_MODES = {'re-enter', 'bounce', 'scatter', 'stick', 'expand'}
|
||||
|
||||
|
||||
class Population(object):
|
||||
"""
|
||||
particle swarm population.
|
||||
"""
|
||||
|
||||
## @var size_req
|
||||
# requested number of particles.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_start
|
||||
# (dict) initial model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_min
|
||||
# (dict) low limits of the model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_max
|
||||
# (dict) high limits of the model parameters.
|
||||
# if min == max, the parameter is kept constant.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_max
|
||||
# (dict) high limits of the model parameters.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var model_step
|
||||
# (dict) initial velocity (difference between two steps) of the particle.
|
||||
# read-only. call setup() to change this attribute.
|
||||
|
||||
## @var friends
|
||||
# number of other particles that each particle consults for the global best fit.
|
||||
# default = 3.
|
||||
|
||||
## @var momentum
|
||||
# momentum of the particle.
|
||||
# default = 0.689343.
|
||||
|
||||
## @var attract_local
|
||||
# preference for returning to the local best fit
|
||||
# default = 1.92694.
|
||||
|
||||
## @var attract_global
|
||||
# preference for heading towards the global best fit.
|
||||
# default = 1.92694
|
||||
|
||||
## @var generation
|
||||
# generation number. the counter is incremented by advance_population().
|
||||
# initial value = 0.
|
||||
|
||||
## @var model_count
|
||||
# model number.
|
||||
# the counter is incremented by advance_particle() each time a particle position is changed.
|
||||
# initial value = 0.
|
||||
|
||||
## @var pos
|
||||
# (numpy.ndarray) current positions of each particle.
|
||||
#
|
||||
# the column names include the names of the model parameters, taken from domain.start,
|
||||
# and the special names @c '_particle', @c '_model', @c '_rfac'.
|
||||
# the special fields have the following meanings:
|
||||
#
|
||||
# * @c '_particle': index of the particle in the array.
|
||||
# the particle index is used to match a calculation result and its original particle.
|
||||
# it must be preserved during the calculation process.
|
||||
#
|
||||
# * @c '_gen': generation number.
|
||||
# the generation number counts the number of calls to advance_population().
|
||||
# this field is not used internally.
|
||||
# the first population is generation 0.
|
||||
#
|
||||
# * @c '_model': model number.
|
||||
# the model number counts the number of calls to advance_particle().
|
||||
# the field is filled with the current value of model_count whenever the position is changed.
|
||||
# this field is not used internally.
|
||||
# the model handlers use it to derive their model ID.
|
||||
#
|
||||
# * @c '_rfac': calculated R-factor for this position.
|
||||
# this field is meaningful in the best and results arrays only
|
||||
# where it is set by the add_result() method.
|
||||
# in the pos and vel arrays, the field value is arbitrary.
|
||||
#
|
||||
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
|
||||
# this object is a <em>view</em> of the original array item
|
||||
|
||||
## @var vel
|
||||
# (numpy.ndarray) current the velocities of each particle.
|
||||
# the structure is the same as for the pos array.
|
||||
|
||||
## @var best
|
||||
# (numpy.ndarray) best positions found by each particle so far.
|
||||
# the structure is the same as for the pos array.
|
||||
|
||||
## @var results
|
||||
# (numpy.ndarray) all positions and resulting R-factors calculated.
|
||||
# the structure is the same as for the pos array.
|
||||
|
||||
## @var _hold_once
|
||||
# (bool) hold the population once during the next update.
|
||||
# if _hold_once is True, advance_population() will skip the update process once.
|
||||
# this flag is set by setup() because it sets up a valid initial population.
|
||||
# the caller then doesn't have to care whether to skip advance_population() after setup.
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
initialize the population object.
|
||||
|
||||
"""
|
||||
self.size_req = 0
|
||||
self.model_start = {}
|
||||
self.model_min = {}
|
||||
self.model_max = {}
|
||||
self.model_step = {}
|
||||
|
||||
self.friends = 3
|
||||
self.momentum = 0.689343
|
||||
self.attract_local = 1.92694
|
||||
self.attract_global = 1.92694
|
||||
self.position_constrain_mode = 'default'
|
||||
self.velocity_constrain_mode = 'default'
|
||||
|
||||
self.generation = 0
|
||||
self.model_count = 0
|
||||
self._hold_once = False
|
||||
|
||||
self.pos = None
|
||||
self.vel = None
|
||||
self.best = None
|
||||
self.results = None
|
||||
|
||||
def pos_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the pos array.
|
||||
|
||||
the generator can be used to loop over the array.
|
||||
on each iteration, it yields a dictionary of the position at the current index.
|
||||
for example,
|
||||
@code{.py}
|
||||
for pos in pop.pos_gen():
|
||||
print pos['_index'], pos['_rfac']
|
||||
@endcode
|
||||
"""
|
||||
return ({name: pos[name] for name in pos.dtype.names} for pos in self.pos)
|
||||
|
||||
def vel_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the vel array.
|
||||
|
||||
@see pos_gen() for details.
|
||||
"""
|
||||
return ({name: vel[name] for name in vel.dtype.names} for vel in self.vel)
|
||||
|
||||
def best_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the best array.
|
||||
|
||||
@see pos_gen() for details.
|
||||
"""
|
||||
return ({name: best[name] for name in best.dtype.names} for best in self.best)
|
||||
|
||||
def results_gen(self):
|
||||
"""
|
||||
generator for dictionaries of the results array.
|
||||
|
||||
@see pos_gen() for details.
|
||||
"""
|
||||
return ({name: results[name] for name in results.dtype.names} for results in self.results)
|
||||
|
||||
@staticmethod
|
||||
def get_model_dtype(model_params):
|
||||
"""
|
||||
get numpy array data type for model parameters and swarm control variables.
|
||||
|
||||
@param model_params: dictionary of model parameters or list of parameter names.
|
||||
|
||||
@return: dtype for use with numpy array constructors.
|
||||
this is a sorted list of (name, type) tuples.
|
||||
"""
|
||||
dt = []
|
||||
for key in model_params:
|
||||
dt.append((key, 'f4'))
|
||||
dt.append(('_particle', 'i4'))
|
||||
dt.append(('_gen', 'i4'))
|
||||
dt.append(('_model', 'i4'))
|
||||
dt.append(('_rfac', 'f4'))
|
||||
dt.sort(key=lambda t: t[0].lower())
|
||||
return dt
|
||||
|
||||
def setup(self, size, domain, history_file="", recalc_history=True):
|
||||
"""
|
||||
set up the population arrays seeded with previous results and the start model.
|
||||
|
||||
* set the population parameters and allocate the data arrays.
|
||||
* set one particle to the initial guess, and the others to positions from a previous results file.
|
||||
if the file contains less particles than allocated, the remaining particles are initialized randomly.
|
||||
|
||||
seeding from a history file can be used to continue an interrupted optimization process.
|
||||
the method loads the results into the best and position arrays,
|
||||
and updates the other arrays and variables
|
||||
so that the population can be advanced and calculated.
|
||||
|
||||
by default, the calculations of the previous parameters are repeated.
|
||||
this is recommended whenever the code, the experimental input, or the project arguments change
|
||||
because all of them may have an influence on the R-factor.
|
||||
|
||||
re-calculation can be turned off by setting recalc_history to false.
|
||||
this is recommended only if the calculation is a direct continuation of a previous one
|
||||
without any changes to the code or input.
|
||||
in that case, the previous results are marked as generation -1 with a negative model number.
|
||||
upon the first iteration before running the scattering calculations,
|
||||
new parameters will be derived by the swarm algorithm.
|
||||
|
||||
@param size: requested number of particles.
|
||||
|
||||
@param domain: definition of initial and limiting model parameters
|
||||
expected by the cluster and parameters functions.
|
||||
|
||||
@arg domain.start: initial guess.
|
||||
@arg domain.min: minimum values allowed.
|
||||
@arg domain.max: maximum values allowed. if min == max, the parameter is kept constant.
|
||||
@arg domain.step: initial velocity (difference between two steps) for particle swarm.
|
||||
|
||||
@param history_file: name of the results history file.
|
||||
this can be a file created by the @ref save_array or @ref save_results methods.
|
||||
the columns of the plain-text file contain model parameters and
|
||||
the _rfac values of a previous calculation.
|
||||
additional columns are ignored.
|
||||
the first row must contain the column names.
|
||||
if a parameter column is missing,
|
||||
the corresponding parameter is seeded with a random value within the domain.
|
||||
in this case, a warning is added to the log file.
|
||||
|
||||
the number of rows does not need to be equal to the population size.
|
||||
if it is lower, the remaining particles are initialized randomly.
|
||||
if it is higher, only the ones with the lowest R factors are used.
|
||||
results with R >= 1.0 are ignored in any case.
|
||||
|
||||
@param recalc_history: select whether the R-factors of the historic models are calculated again.
|
||||
this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
||||
or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
||||
set this argument to False only if the calculation is a continuation of a previous one
|
||||
without any changes to the code.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.size_req = size
|
||||
self.model_start = domain.start
|
||||
self.model_min = domain.min
|
||||
self.model_max = domain.max
|
||||
self.model_step = domain.step
|
||||
|
||||
# allocate arrays
|
||||
dt = self.get_model_dtype(self.model_start)
|
||||
self.pos = np.zeros(self.size_req, dtype=dt)
|
||||
self.vel = np.zeros(self.size_req, dtype=dt)
|
||||
self.results = np.empty((0), dtype=dt)
|
||||
|
||||
# randomize population
|
||||
self.generation = 0
|
||||
self.randomize()
|
||||
self.pos['_particle'] = np.arange(self.size_req)
|
||||
self.pos['_gen'] = self.generation
|
||||
self.pos['_model'] = np.arange(self.size_req)
|
||||
self.pos['_rfac'] = 2.1
|
||||
self.model_count = self.size_req
|
||||
|
||||
# add previous results
|
||||
if history_file:
|
||||
hist = np.genfromtxt(history_file, names=True)
|
||||
hist = hist[hist['_rfac'] < 1.0]
|
||||
hist.sort(order='_rfac')
|
||||
hist_size = min(hist.shape[0], self.size_req - 1)
|
||||
|
||||
discarded_fields = {'_particle', '_gen', '_model'}
|
||||
source_fields = set(hist.dtype.names) - discarded_fields
|
||||
dest_fields = set(self.pos.dtype.names) - discarded_fields
|
||||
common_fields = source_fields & dest_fields
|
||||
if len(common_fields) < len(dest_fields):
|
||||
logger.warning(BMsg("missing columns in history file {hf} default to random seed value.",
|
||||
hf=history_file))
|
||||
for name in common_fields:
|
||||
self.pos[name][0:hist_size] = hist[name][0:hist_size]
|
||||
|
||||
self.pos['_particle'] = np.arange(self.size_req)
|
||||
logger.info(BMsg("seeding swarm population with {hs} models from history file {hf}.",
|
||||
hs=hist_size, hf=history_file))
|
||||
if recalc_history:
|
||||
self.pos['_gen'] = self.generation
|
||||
self.pos['_model'] = np.arange(self.size_req)
|
||||
self.pos['_rfac'] = 2.1
|
||||
logger.info("historic models will be re-calculated.")
|
||||
else:
|
||||
self.pos['_gen'][0:hist_size] = -1
|
||||
self.pos['_model'][0:hist_size] = -np.arange(hist_size) - 1
|
||||
self.model_count = self.size_req - hist_size
|
||||
self.pos['_model'][hist_size:] = np.arange(self.model_count)
|
||||
logger.info("historic models will not be re-calculated.")
|
||||
|
||||
# seed last particle with start parameters
|
||||
self.seed(self.model_start, index=-1)
|
||||
|
||||
# initialize best array
|
||||
self.best = self.pos.copy()
|
||||
|
||||
self._hold_once = True
|
||||
|
||||
def randomize(self, pos=True, vel=True):
|
||||
"""
|
||||
initializes a random population.
|
||||
|
||||
the position array is filled with random values (uniform distribution) from the parameter domain.
|
||||
velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
|
||||
|
||||
the method does not update the particle info fields.
|
||||
|
||||
@param pos: randomize positions. if False, the positions are not changed.
|
||||
@param vel: randomize velocities. if False, the velocities are not changed.
|
||||
"""
|
||||
if pos:
|
||||
for key in self.model_start:
|
||||
self.pos[key] = ((self.model_max[key] - self.model_min[key]) *
|
||||
np.random.random_sample(self.pos.shape) + self.model_min[key])
|
||||
if vel:
|
||||
for key in self.model_start:
|
||||
self.vel[key] = ((self.model_max[key] - self.model_min[key]) *
|
||||
(np.random.random_sample(self.pos.shape) - 0.5) / 4.0)
|
||||
|
||||
def seed(self, params, index=0):
|
||||
"""
|
||||
set the one of the particles to the specified seed values.
|
||||
|
||||
the method does not update the particle info fields.
|
||||
|
||||
@param params: dictionary of model parameters.
|
||||
the keys must match the ones of domain.start.
|
||||
|
||||
@param index: index of the particle that is seeded.
|
||||
the index must be in the allowed range of the self.pos array.
|
||||
0 is the first, -1 the last particle.
|
||||
"""
|
||||
for key in params:
|
||||
self.pos[key][index] = params[key]
|
||||
|
||||
def update_particle_info(self, index, inc_model=True):
|
||||
"""
|
||||
set the internal particle info fields.
|
||||
|
||||
the fields @c _particle, @c _gen, and @c _model are updated with the current values.
|
||||
@c _rfac is set to the default value 2.1.
|
||||
|
||||
this method must be called after each change of particle position.
|
||||
|
||||
@param index: (int) particle index.
|
||||
|
||||
@param inc_model: (bool) if True, increment the model count afterwards.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.pos['_particle'][index] = index
|
||||
self.pos['_gen'][index] = self.generation
|
||||
self.pos['_model'][index] = self.model_count
|
||||
self.pos['_rfac'][index] = 2.1
|
||||
|
||||
if inc_model:
|
||||
self.model_count += 1
|
||||
|
||||
def advance_population(self):
|
||||
"""
|
||||
advance the population by one step.
|
||||
|
||||
this method just calls advance_particle() for each particle of the population.
|
||||
if generation is lower than zero, the method increases the generation number but does not advance the particles.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if not self._hold_once:
|
||||
self.generation += 1
|
||||
for index, __ in enumerate(self.pos):
|
||||
self.advance_particle(index)
|
||||
self._hold_once = False
|
||||
|
||||
def advance_particle(self, index):
|
||||
"""
|
||||
advance a particle by one step.
|
||||
|
||||
@param index: index of the particle in the population.
|
||||
"""
|
||||
|
||||
# note: the following two identifiers are views,
|
||||
# assignment will modify the original array
|
||||
pos = self.pos[index]
|
||||
vel = self.vel[index]
|
||||
# best fit that this individual has seen
|
||||
xl = self.best[index]
|
||||
# best fit that a group of others have seen
|
||||
xg = self.best_friend(index)
|
||||
|
||||
for key in self.model_start:
|
||||
# update velocity
|
||||
dxl = xl[key] - pos[key]
|
||||
dxg = xg[key] - pos[key]
|
||||
pv = np.random.random()
|
||||
pl = np.random.random()
|
||||
pg = np.random.random()
|
||||
vel[key] = (self.momentum * pv * vel[key] +
|
||||
self.attract_local * pl * dxl +
|
||||
self.attract_global * pg * dxg)
|
||||
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
||||
self.velocity_constrain_mode)
|
||||
# update position
|
||||
pos[key] += vel[key]
|
||||
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
||||
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
||||
self.position_constrain_mode)
|
||||
|
||||
self.update_particle_info(index)
|
||||
|
||||
@staticmethod
|
||||
def constrain_velocity(_pos, _vel, _min, _max, _mode='default'):
|
||||
"""
|
||||
constrain a velocity to the given bounds.
|
||||
|
||||
@param _pos: current position of the particle.
|
||||
|
||||
@param _vel: new velocity of the particle, i.e. distance to move.
|
||||
|
||||
@param _min: lower position boundary.
|
||||
|
||||
@param _max: upper position boundary.
|
||||
_max must be greater or equal to _min.
|
||||
|
||||
@param _mode: what to do if a boundary constraint is violated.
|
||||
reserved for future use. should be set to 'default'.
|
||||
|
||||
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
||||
in the current implementation only the velocity may change.
|
||||
however, in future versions any of these values may change.
|
||||
"""
|
||||
d = abs(_max - _min) / 2.0
|
||||
if d > 0.0:
|
||||
while abs(_vel) >= d:
|
||||
_vel /= 2.0
|
||||
else:
|
||||
_vel = 0.0
|
||||
return _pos, _vel, _min, _max
|
||||
|
||||
@staticmethod
|
||||
def constrain_position(_pos, _vel, _min, _max, _mode='default'):
|
||||
"""
|
||||
constrain a position to the given bounds.
|
||||
|
||||
@param _pos: new position of the particle, possible out of bounds.
|
||||
|
||||
@param _vel: velocity of the particle, i.e. distance from the previous position.
|
||||
_vel must be lower than _max - _min.
|
||||
|
||||
@param _min: lower boundary.
|
||||
|
||||
@param _max: upper boundary.
|
||||
_max must be greater or equal to _min.
|
||||
|
||||
@param _mode: what to do if a boundary constraint is violated:
|
||||
@arg 're-enter': re-enter from the opposite side of the parameter interval.
|
||||
@arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
|
||||
@arg 'scatter': place the particle at a random place between its old position and the violated boundary.
|
||||
@arg 'stick': place the particle at the violated boundary.
|
||||
@arg 'expand': move the boundary so that the particle fits.
|
||||
@arg 'random': place the particle at a random position between the lower and upper boundaries.
|
||||
@arg 'default': the default mode is 'bounce'. this may change in future versions.
|
||||
|
||||
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
||||
depending on the mode, any of these values may change.
|
||||
the velocity is adjusted to be consistent with the change of position.
|
||||
"""
|
||||
_rng = max(_max - _min, 0.0)
|
||||
_old = _pos - _vel
|
||||
|
||||
# prevent undershoot
|
||||
if _vel > 0.0 and _pos < _min:
|
||||
_pos = _min
|
||||
_vel = _pos - _old
|
||||
if _vel < 0.0 and _pos > _max:
|
||||
_pos = _max
|
||||
_vel = _pos - _old
|
||||
|
||||
assert abs(_vel) <= _rng, \
|
||||
"velocity: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
|
||||
assert (_vel >= 0 and _pos >= _min) or (_vel <= 0 and _pos <= _max), \
|
||||
"undershoot: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
|
||||
|
||||
if _rng > 0.0:
|
||||
while _pos > _max:
|
||||
if _mode == 're-enter':
|
||||
_pos -= _rng
|
||||
elif _mode == 'bounce' or _mode == 'default':
|
||||
_pos = _max - (_pos - _max)
|
||||
_vel = -_vel
|
||||
elif _mode == 'scatter':
|
||||
_pos = _old + (_max - _old) * np.random.random()
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'stick':
|
||||
_pos = _max
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'expand':
|
||||
_max = _pos
|
||||
elif _mode == 'random':
|
||||
_pos = _min + _rng * np.random.random()
|
||||
_vel = _pos - _old
|
||||
else:
|
||||
raise ValueError('invalid constrain mode')
|
||||
|
||||
while _pos < _min:
|
||||
if _mode == 're-enter':
|
||||
_pos += _rng
|
||||
elif _mode == 'bounce' or _mode == 'default':
|
||||
_pos = _min - (_pos - _min)
|
||||
_vel = -_vel
|
||||
elif _mode == 'scatter':
|
||||
_pos = _old + (_min - _old) * np.random.random()
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'stick':
|
||||
_pos = _min
|
||||
_vel = _pos - _old
|
||||
elif _mode == 'expand':
|
||||
_min = _pos
|
||||
elif _mode == 'random':
|
||||
_pos = _min + _rng * np.random.random()
|
||||
_vel = _pos - _old
|
||||
else:
|
||||
raise ValueError('invalid constrain mode')
|
||||
else:
|
||||
_pos = _max
|
||||
_vel = 0.0
|
||||
|
||||
return _pos, _vel, _min, _max
|
||||
|
||||
# noinspection PyUnusedLocal
|
||||
def best_friend(self, index):
|
||||
"""
|
||||
select the best fit out of a random set of particles
|
||||
|
||||
returns the "best friend"
|
||||
"""
|
||||
friends = np.random.choice(self.best, self.friends, replace=False)
|
||||
index = np.argmin(friends['_rfac'])
|
||||
return friends[index]
|
||||
|
||||
def add_result(self, particle, rfac):
|
||||
"""
|
||||
add a calculation particle to the results array, and update the best fit array.
|
||||
|
||||
@param particle: dictionary of model parameters and particle values.
|
||||
the keys must correspond to the columns of the pos array,
|
||||
i.e. the names of the model parameters plus the _rfac, _particle, and _model fields.
|
||||
|
||||
@param rfac: calculated R-factor.
|
||||
the R-factor is written to the '_rfac' field.
|
||||
|
||||
@return better (bool): True if the new R-factor is better than the particle's previous best mark.
|
||||
"""
|
||||
particle['_rfac'] = rfac
|
||||
l = [particle[n] for n in self.results.dtype.names]
|
||||
t = tuple(l)
|
||||
a = np.asarray(t, dtype=self.results.dtype)
|
||||
self.results = np.append(self.results, a)
|
||||
index = particle['_particle']
|
||||
better = particle['_rfac'] < self.best['_rfac'][index]
|
||||
if better:
|
||||
self.best[index] = a
|
||||
|
||||
return better
|
||||
|
||||
def is_converged(self, tol=0.01):
|
||||
"""
|
||||
check whether the population has converged.
|
||||
|
||||
convergence is reached when the R-factors of the N latest results,
|
||||
do not vary more than tol, where N is the size of the population.
|
||||
|
||||
@param tol: max. difference allowed between greatest and lowest value of the R factor in the population.
|
||||
"""
|
||||
nres = self.results.shape[0]
|
||||
npop = self.pos.shape[0]
|
||||
if nres >= npop:
|
||||
rfac1 = np.min(self.results['_rfac'][-npop:])
|
||||
rfac2 = np.max(self.results['_rfac'][-npop:])
|
||||
converg = rfac2 - rfac1 < tol
|
||||
return converg
|
||||
else:
|
||||
return False
|
||||
|
||||
def save_array(self, filename, array):
|
||||
"""
|
||||
save a population array to a text file.
|
||||
|
||||
the columns are space-delimited.
|
||||
the first line contains the column names.
|
||||
|
||||
@param filename: name of destination file, optionally including a path.
|
||||
|
||||
@param array: population array to save.
|
||||
must be one of self.pos, self.vel, self.best, self.results
|
||||
"""
|
||||
header = " ".join(self.results.dtype.names)
|
||||
np.savetxt(filename, array, fmt='%g', header=header)
|
||||
|
||||
def load_array(self, filename, array):
|
||||
"""
|
||||
load a population array from a text file.
|
||||
|
||||
the array to load must be compatible with the current population
|
||||
(same number of rows, same columns).
|
||||
the first row must contain column names.
|
||||
the ordering of columns may be different.
|
||||
the returned array is ordered according to the array argument.
|
||||
|
||||
@param filename: name of source file, optionally including a path.
|
||||
|
||||
@param array: population array to load.
|
||||
must be one of self.pos, self.vel, self.results.
|
||||
|
||||
@return array with loaded data.
|
||||
this may be the same instance as on input.
|
||||
|
||||
@raise AssertionError if the number of rows of the two files differ.
|
||||
"""
|
||||
data = np.genfromtxt(filename, names=True)
|
||||
assert data.shape == array.shape
|
||||
for name in data.dtype.names:
|
||||
array[name] = data[name]
|
||||
return array
|
||||
|
||||
def save_population(self, base_filename):
|
||||
"""
|
||||
save the population array to a set of text files.
|
||||
|
||||
the file name extensions are .pos, .vel, and .best
|
||||
"""
|
||||
self.save_array(base_filename + ".pos", self.pos)
|
||||
self.save_array(base_filename + ".vel", self.vel)
|
||||
self.save_array(base_filename + ".best", self.best)
|
||||
|
||||
def load_population(self, base_filename):
|
||||
"""
|
||||
load the population array from a set of previously saved text files.
|
||||
this can be used to continue an optimization job.
|
||||
|
||||
the file name extensions are .pos, .vel, and .best.
|
||||
the files must have the same format as produced by save_population.
|
||||
the files must have the same number of rows.
|
||||
"""
|
||||
self.pos = self.load_array(base_filename + ".pos", self.pos)
|
||||
self.vel = self.load_array(base_filename + ".vel", self.vel)
|
||||
self.best = self.load_array(base_filename + ".best", self.best)
|
||||
|
||||
def save_results(self, filename):
|
||||
"""
|
||||
saves the complete list of calculations results.
|
||||
"""
|
||||
self.save_array(filename, self.results)
|
||||
|
||||
|
||||
class ParticleSwarmHandler(handlers.ModelHandler):
|
||||
"""
|
||||
model handler which implements the particle swarm optimization algorithm.
|
||||
|
||||
"""
|
||||
|
||||
## @var _pop (Population)
|
||||
# holds the population object.
|
||||
|
||||
## @var _pop_size (int)
|
||||
# number of particles in the swarm.
|
||||
|
||||
## @var _outfile (file)
|
||||
# output file for model parametes and R factor.
|
||||
# the file is open during calculations.
|
||||
# each calculation result adds one line.
|
||||
|
||||
## @var _model_time (timedelta)
|
||||
# estimated CPU time to calculate one model.
|
||||
# this value is the maximum time measured of the completed calculations.
|
||||
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
|
||||
|
||||
## @var _converged (bool)
|
||||
# indicates that the population has converged.
|
||||
# convergence is detected by calling Population.is_converged().
|
||||
# once convergence has been reached, this flag is set, and further convergence tests are skipped.
|
||||
|
||||
## @var _timeout (bool)
|
||||
# indicates when the handler has run out of time,
|
||||
# i.e. time is up before convergence has been reached.
|
||||
# if _timeout is True, create_tasks() will not create further tasks,
|
||||
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
|
||||
|
||||
## @var _invalid_limit (int)
|
||||
# maximum tolerated number of invalid calculations.
|
||||
#
|
||||
# if the number of invalid calculations (self._invalid_count) exceeds this limit,
|
||||
# the optimization is aborted.
|
||||
# the variable is initialized by self.setup() to 10 times the population size.
|
||||
|
||||
def __init__(self):
|
||||
super(ParticleSwarmHandler, self).__init__()
|
||||
self._pop = None
|
||||
self._pop_size = 0
|
||||
self._outfile = None
|
||||
self._model_time = datetime.timedelta()
|
||||
self._converged = False
|
||||
self._timeout = False
|
||||
self._invalid_limit = 10
|
||||
|
||||
def setup(self, project, slots):
|
||||
"""
|
||||
initialize the particle swarm and open an output file.
|
||||
|
||||
the population size is set to project.pop_size if it is defined and greater than 4.
|
||||
otherwise, it defaults to <code>max(2 * slots, 4)</code>.
|
||||
|
||||
for good efficiency the population size (number of particles) should be
|
||||
greater or equal to the number of available processing slots,
|
||||
otherwise the next generation is created before all particles have been calculated
|
||||
which may slow down convergence.
|
||||
|
||||
if calculations take a long time compared to the available computation time
|
||||
or spawn a lot of sub-tasks due to complex symmetry,
|
||||
and you prefer to allow for a good number of generations,
|
||||
you should override the population size.
|
||||
|
||||
@param project: project instance.
|
||||
|
||||
@param slots: number of calculation processes available through MPI.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
super(ParticleSwarmHandler, self).setup(project, slots)
|
||||
|
||||
_min_size = 4
|
||||
if project.pop_size:
|
||||
self._pop_size = max(project.pop_size, _min_size)
|
||||
else:
|
||||
self._pop_size = max(self._slots * 2, _min_size)
|
||||
self._pop = Population()
|
||||
self._pop.setup(self._pop_size, self._project.create_domain(), self._project.history_file,
|
||||
self._project.recalc_history)
|
||||
self._invalid_limit = self._pop_size * 10
|
||||
|
||||
self._outfile = open(self._project.output_file + ".dat", "w")
|
||||
self._outfile.write("# ")
|
||||
self._outfile.write(" ".join(self._pop.results.dtype.names))
|
||||
self._outfile.write("\n")
|
||||
|
||||
return None
|
||||
|
||||
def cleanup(self):
|
||||
self._outfile.close()
|
||||
super(ParticleSwarmHandler, self).cleanup()
|
||||
|
||||
def create_tasks(self, parent_task):
|
||||
"""
|
||||
develop the particle population and create a calculation task per particle.
|
||||
|
||||
this method advances the population by one step.
|
||||
it generates one task for each particle if its model number is positive.
|
||||
negative model numbers indicate that the particle is used for seeding
|
||||
and does not need to be calculated in the first generation.
|
||||
|
||||
if the time limit is approaching, no new tasks are created.
|
||||
|
||||
the process loop calls this method every time the length of the task queue drops
|
||||
below the number of calculation processes (slots).
|
||||
this means in particular that a population will not be completely calculated
|
||||
before the next generation starts.
|
||||
for efficiency reasons, we do not wait until a population is complete.
|
||||
this will cause a certain mixing of generations and slow down convergence
|
||||
because the best peer position in the generation may not be known yet.
|
||||
the effect can be reduced by making the population larger than the number of processes.
|
||||
|
||||
@return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
|
||||
"""
|
||||
|
||||
super(ParticleSwarmHandler, self).create_tasks(parent_task)
|
||||
|
||||
# this is the top-level handler, we expect just one parent: root.
|
||||
parent_id = parent_task.id
|
||||
assert parent_id == (-1, -1, -1, -1, -1)
|
||||
self._parent_tasks[parent_id] = parent_task
|
||||
|
||||
time_pending = self._model_time * len(self._pending_tasks)
|
||||
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
|
||||
|
||||
out_tasks = []
|
||||
if not self._timeout and not self._converged:
|
||||
self._pop.advance_population()
|
||||
|
||||
for pos in self._pop.pos_gen():
|
||||
time_pending += self._model_time
|
||||
if time_pending > time_avail:
|
||||
self._timeout = True
|
||||
logger.info("time limit reached")
|
||||
break
|
||||
|
||||
if pos['_model'] >= 0:
|
||||
new_task = parent_task.copy()
|
||||
new_task.parent_id = parent_id
|
||||
new_task.model = pos
|
||||
new_task.change_id(model=pos['_model'])
|
||||
|
||||
child_id = new_task.id
|
||||
self._pending_tasks[child_id] = new_task
|
||||
out_tasks.append(new_task)
|
||||
|
||||
return out_tasks
|
||||
|
||||
def add_result(self, task):
|
||||
"""
|
||||
calculate the R factor of the result and add it to the results list of the population.
|
||||
|
||||
* save the current population.
|
||||
* append the result to the result output file.
|
||||
* update the execution time statistics.
|
||||
* remove temporary files if requested.
|
||||
* check whether the population has converged.
|
||||
|
||||
@return parent task (CalculationTask) if the optimization has converged, @c None otherwise.
|
||||
"""
|
||||
super(ParticleSwarmHandler, self).add_result(task)
|
||||
|
||||
self._complete_tasks[task.id] = task
|
||||
del self._pending_tasks[task.id]
|
||||
parent_task = self._parent_tasks[task.parent_id]
|
||||
|
||||
rfac = 1.0
|
||||
if task.result_valid:
|
||||
try:
|
||||
rfac = self._project.calc_rfactor(task)
|
||||
except ValueError:
|
||||
task.result_valid = False
|
||||
self._invalid_count += 1
|
||||
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
||||
|
||||
task.model['_rfac'] = rfac
|
||||
self._pop.add_result(task.model, rfac)
|
||||
self._pop.save_population(self._project.output_file + ".pop")
|
||||
|
||||
if self._outfile:
|
||||
s = (str(task.model[name]) for name in self._pop.results.dtype.names)
|
||||
self._outfile.write(" ".join(s))
|
||||
self._outfile.write("\n")
|
||||
self._outfile.flush()
|
||||
|
||||
self._project.files.update_model_rfac(task.id.model, rfac)
|
||||
self._project.files.set_model_complete(task.id.model, True)
|
||||
|
||||
if task.result_valid:
|
||||
if self._pop.is_converged() and not self._converged:
|
||||
logger.info("population converged")
|
||||
self._converged = True
|
||||
|
||||
if task.time > self._model_time:
|
||||
self._model_time = task.time
|
||||
else:
|
||||
if self._invalid_count >= self._invalid_limit:
|
||||
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
|
||||
self._converged = True
|
||||
|
||||
# optimization complete?
|
||||
if (self._timeout or self._converged) and len(self._pending_tasks) == 0:
|
||||
del self._parent_tasks[parent_task.id]
|
||||
else:
|
||||
parent_task = None
|
||||
|
||||
self.cleanup_files(keep=self._pop_size)
|
||||
return parent_task
|
||||
Reference in New Issue
Block a user