910 lines
35 KiB
Python
910 lines
35 KiB
Python
"""
|
|
@package pmsco.swarm
|
|
particle swarm optimization handler.
|
|
|
|
the module starts multiple MSC calculations and optimizes the model parameters
|
|
according to the particle swarm optimization algorithm.
|
|
|
|
Particle swarm optimization adapted from
|
|
D. A. Duncan et al., Surface Science 606, 278 (2012)
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
from __future__ import division
|
|
import copy
|
|
import os
|
|
import datetime
|
|
import logging
|
|
import numpy as np
|
|
import handlers
|
|
from helpers import BraceMessage as BMsg
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
CONSTRAIN_MODES = {'re-enter', 'bounce', 'scatter', 'stick', 'expand'}
|
|
|
|
|
|
class Population(object):
|
|
"""
|
|
particle swarm population.
|
|
"""
|
|
|
|
## @var size_req
|
|
# requested number of particles.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_start
|
|
# (dict) initial model parameters.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_min
|
|
# (dict) low limits of the model parameters.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_max
|
|
# (dict) high limits of the model parameters.
|
|
# if min == max, the parameter is kept constant.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_max
|
|
# (dict) high limits of the model parameters.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_step
|
|
# (dict) initial velocity (difference between two steps) of the particle.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var friends
|
|
# number of other particles that each particle consults for the global best fit.
|
|
# default = 3.
|
|
|
|
## @var momentum
|
|
# momentum of the particle.
|
|
# default = 0.689343.
|
|
|
|
## @var attract_local
|
|
# preference for returning to the local best fit
|
|
# default = 1.92694.
|
|
|
|
## @var attract_global
|
|
# preference for heading towards the global best fit.
|
|
# default = 1.92694
|
|
|
|
## @var generation
|
|
# generation number. the counter is incremented by advance_population().
|
|
# initial value = 0.
|
|
|
|
## @var model_count
|
|
# model number.
|
|
# the counter is incremented by advance_particle() each time a particle position is changed.
|
|
# initial value = 0.
|
|
|
|
## @var pos
|
|
# (numpy.ndarray) current positions of each particle.
|
|
#
|
|
# the column names include the names of the model parameters, taken from domain.start,
|
|
# and the special names @c '_particle', @c '_model', @c '_rfac'.
|
|
# the special fields have the following meanings:
|
|
#
|
|
# * @c '_particle': index of the particle in the array.
|
|
# the particle index is used to match a calculation result and its original particle.
|
|
# it must be preserved during the calculation process.
|
|
#
|
|
# * @c '_gen': generation number.
|
|
# the generation number counts the number of calls to advance_population().
|
|
# this field is not used internally.
|
|
# the first population is generation 0.
|
|
#
|
|
# * @c '_model': model number.
|
|
# the model number counts the number of calls to advance_particle().
|
|
# the field is filled with the current value of model_count whenever the position is changed.
|
|
# this field is not used internally.
|
|
# the model handlers use it to derive their model ID.
|
|
#
|
|
# * @c '_rfac': calculated R-factor for this position.
|
|
# this field is meaningful in the best and results arrays only
|
|
# where it is set by the add_result() method.
|
|
# in the pos and vel arrays, the field value is arbitrary.
|
|
#
|
|
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
|
|
# this object is a <em>view</em> of the original array item
|
|
|
|
## @var vel
|
|
# (numpy.ndarray) current the velocities of each particle.
|
|
# the structure is the same as for the pos array.
|
|
|
|
## @var best
|
|
# (numpy.ndarray) best positions found by each particle so far.
|
|
# the structure is the same as for the pos array.
|
|
|
|
## @var results
|
|
# (numpy.ndarray) all positions and resulting R-factors calculated.
|
|
# the structure is the same as for the pos array.
|
|
|
|
## @var _hold_once
|
|
# (bool) hold the population once during the next update.
|
|
# if _hold_once is True, advance_population() will skip the update process once.
|
|
# this flag is set by setup() because it sets up a valid initial population.
|
|
# the caller then doesn't have to care whether to skip advance_population() after setup.
|
|
|
|
def __init__(self):
|
|
"""
|
|
initialize the population object.
|
|
|
|
"""
|
|
self.size_req = 0
|
|
self.model_start = {}
|
|
self.model_min = {}
|
|
self.model_max = {}
|
|
self.model_step = {}
|
|
|
|
self.friends = 3
|
|
self.momentum = 0.689343
|
|
self.attract_local = 1.92694
|
|
self.attract_global = 1.92694
|
|
self.position_constrain_mode = 'default'
|
|
self.velocity_constrain_mode = 'default'
|
|
|
|
self.generation = 0
|
|
self.model_count = 0
|
|
self._hold_once = False
|
|
|
|
self.pos = None
|
|
self.vel = None
|
|
self.best = None
|
|
self.results = None
|
|
|
|
def pos_gen(self):
|
|
"""
|
|
generator for dictionaries of the pos array.
|
|
|
|
the generator can be used to loop over the array.
|
|
on each iteration, it yields a dictionary of the position at the current index.
|
|
for example,
|
|
@code{.py}
|
|
for pos in pop.pos_gen():
|
|
print pos['_index'], pos['_rfac']
|
|
@endcode
|
|
"""
|
|
return ({name: pos[name] for name in pos.dtype.names} for pos in self.pos)
|
|
|
|
def vel_gen(self):
|
|
"""
|
|
generator for dictionaries of the vel array.
|
|
|
|
@see pos_gen() for details.
|
|
"""
|
|
return ({name: vel[name] for name in vel.dtype.names} for vel in self.vel)
|
|
|
|
def best_gen(self):
|
|
"""
|
|
generator for dictionaries of the best array.
|
|
|
|
@see pos_gen() for details.
|
|
"""
|
|
return ({name: best[name] for name in best.dtype.names} for best in self.best)
|
|
|
|
def results_gen(self):
|
|
"""
|
|
generator for dictionaries of the results array.
|
|
|
|
@see pos_gen() for details.
|
|
"""
|
|
return ({name: results[name] for name in results.dtype.names} for results in self.results)
|
|
|
|
@staticmethod
|
|
def get_model_dtype(model_params):
|
|
"""
|
|
get numpy array data type for model parameters and swarm control variables.
|
|
|
|
@param model_params: dictionary of model parameters or list of parameter names.
|
|
|
|
@return: dtype for use with numpy array constructors.
|
|
this is a sorted list of (name, type) tuples.
|
|
"""
|
|
dt = []
|
|
for key in model_params:
|
|
dt.append((key, 'f4'))
|
|
dt.append(('_particle', 'i4'))
|
|
dt.append(('_gen', 'i4'))
|
|
dt.append(('_model', 'i4'))
|
|
dt.append(('_rfac', 'f4'))
|
|
dt.sort(key=lambda t: t[0].lower())
|
|
return dt
|
|
|
|
def setup(self, size, domain, history_file="", recalc_history=True):
|
|
"""
|
|
set up the population arrays seeded with previous results and the start model.
|
|
|
|
* set the population parameters and allocate the data arrays.
|
|
* set one particle to the initial guess, and the others to positions from a previous results file.
|
|
if the file contains less particles than allocated, the remaining particles are initialized randomly.
|
|
|
|
seeding from a history file can be used to continue an interrupted optimization process.
|
|
the method loads the results into the best and position arrays,
|
|
and updates the other arrays and variables
|
|
so that the population can be advanced and calculated.
|
|
|
|
by default, the calculations of the previous parameters are repeated.
|
|
this is recommended whenever the code, the experimental input, or the project arguments change
|
|
because all of them may have an influence on the R-factor.
|
|
|
|
re-calculation can be turned off by setting recalc_history to false.
|
|
this is recommended only if the calculation is a direct continuation of a previous one
|
|
without any changes to the code or input.
|
|
in that case, the previous results are marked as generation -1 with a negative model number.
|
|
upon the first iteration before running the scattering calculations,
|
|
new parameters will be derived by the swarm algorithm.
|
|
|
|
@param size: requested number of particles.
|
|
|
|
@param domain: definition of initial and limiting model parameters
|
|
expected by the cluster and parameters functions.
|
|
|
|
@arg domain.start: initial guess.
|
|
@arg domain.min: minimum values allowed.
|
|
@arg domain.max: maximum values allowed. if min == max, the parameter is kept constant.
|
|
@arg domain.step: initial velocity (difference between two steps) for particle swarm.
|
|
|
|
@param history_file: name of the results history file.
|
|
this can be a file created by the @ref save_array or @ref save_results methods.
|
|
the columns of the plain-text file contain model parameters and
|
|
the _rfac values of a previous calculation.
|
|
additional columns are ignored.
|
|
the first row must contain the column names.
|
|
if a parameter column is missing,
|
|
the corresponding parameter is seeded with a random value within the domain.
|
|
in this case, a warning is added to the log file.
|
|
|
|
the number of rows does not need to be equal to the population size.
|
|
if it is lower, the remaining particles are initialized randomly.
|
|
if it is higher, only the ones with the lowest R factors are used.
|
|
results with R >= 1.0 are ignored in any case.
|
|
|
|
@param recalc_history: select whether the R-factors of the historic models are calculated again.
|
|
this is useful if the historic data was calculated for a different cluster, different set of parameters,
|
|
or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
|
set this argument to False only if the calculation is a continuation of a previous one
|
|
without any changes to the code.
|
|
|
|
@return: None
|
|
"""
|
|
self.size_req = size
|
|
self.model_start = domain.start
|
|
self.model_min = domain.min
|
|
self.model_max = domain.max
|
|
self.model_step = domain.step
|
|
|
|
# allocate arrays
|
|
dt = self.get_model_dtype(self.model_start)
|
|
self.pos = np.zeros(self.size_req, dtype=dt)
|
|
self.vel = np.zeros(self.size_req, dtype=dt)
|
|
self.results = np.empty((0), dtype=dt)
|
|
|
|
# randomize population
|
|
self.generation = 0
|
|
self.randomize()
|
|
self.pos['_particle'] = np.arange(self.size_req)
|
|
self.pos['_gen'] = self.generation
|
|
self.pos['_model'] = np.arange(self.size_req)
|
|
self.pos['_rfac'] = 2.1
|
|
self.model_count = self.size_req
|
|
|
|
# add previous results
|
|
if history_file:
|
|
hist = np.genfromtxt(history_file, names=True)
|
|
hist = hist[hist['_rfac'] < 1.0]
|
|
hist.sort(order='_rfac')
|
|
hist_size = min(hist.shape[0], self.size_req - 1)
|
|
|
|
discarded_fields = {'_particle', '_gen', '_model'}
|
|
source_fields = set(hist.dtype.names) - discarded_fields
|
|
dest_fields = set(self.pos.dtype.names) - discarded_fields
|
|
common_fields = source_fields & dest_fields
|
|
if len(common_fields) < len(dest_fields):
|
|
logger.warning(BMsg("missing columns in history file {hf} default to random seed value.",
|
|
hf=history_file))
|
|
for name in common_fields:
|
|
self.pos[name][0:hist_size] = hist[name][0:hist_size]
|
|
|
|
self.pos['_particle'] = np.arange(self.size_req)
|
|
logger.info(BMsg("seeding swarm population with {hs} models from history file {hf}.",
|
|
hs=hist_size, hf=history_file))
|
|
if recalc_history:
|
|
self.pos['_gen'] = self.generation
|
|
self.pos['_model'] = np.arange(self.size_req)
|
|
self.pos['_rfac'] = 2.1
|
|
logger.info("historic models will be re-calculated.")
|
|
else:
|
|
self.pos['_gen'][0:hist_size] = -1
|
|
self.pos['_model'][0:hist_size] = -np.arange(hist_size) - 1
|
|
self.model_count = self.size_req - hist_size
|
|
self.pos['_model'][hist_size:] = np.arange(self.model_count)
|
|
logger.info("historic models will not be re-calculated.")
|
|
|
|
# seed last particle with start parameters
|
|
self.seed(self.model_start, index=-1)
|
|
|
|
# initialize best array
|
|
self.best = self.pos.copy()
|
|
|
|
self._hold_once = True
|
|
|
|
def randomize(self, pos=True, vel=True):
|
|
"""
|
|
initializes a random population.
|
|
|
|
the position array is filled with random values (uniform distribution) from the parameter domain.
|
|
velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
|
|
|
|
the method does not update the particle info fields.
|
|
|
|
@param pos: randomize positions. if False, the positions are not changed.
|
|
@param vel: randomize velocities. if False, the velocities are not changed.
|
|
"""
|
|
if pos:
|
|
for key in self.model_start:
|
|
self.pos[key] = ((self.model_max[key] - self.model_min[key]) *
|
|
np.random.random_sample(self.pos.shape) + self.model_min[key])
|
|
if vel:
|
|
for key in self.model_start:
|
|
self.vel[key] = ((self.model_max[key] - self.model_min[key]) *
|
|
(np.random.random_sample(self.pos.shape) - 0.5) / 4.0)
|
|
|
|
def seed(self, params, index=0):
|
|
"""
|
|
set the one of the particles to the specified seed values.
|
|
|
|
the method does not update the particle info fields.
|
|
|
|
@param params: dictionary of model parameters.
|
|
the keys must match the ones of domain.start.
|
|
|
|
@param index: index of the particle that is seeded.
|
|
the index must be in the allowed range of the self.pos array.
|
|
0 is the first, -1 the last particle.
|
|
"""
|
|
for key in params:
|
|
self.pos[key][index] = params[key]
|
|
|
|
def update_particle_info(self, index, inc_model=True):
|
|
"""
|
|
set the internal particle info fields.
|
|
|
|
the fields @c _particle, @c _gen, and @c _model are updated with the current values.
|
|
@c _rfac is set to the default value 2.1.
|
|
|
|
this method must be called after each change of particle position.
|
|
|
|
@param index: (int) particle index.
|
|
|
|
@param inc_model: (bool) if True, increment the model count afterwards.
|
|
|
|
@return: None
|
|
"""
|
|
self.pos['_particle'][index] = index
|
|
self.pos['_gen'][index] = self.generation
|
|
self.pos['_model'][index] = self.model_count
|
|
self.pos['_rfac'][index] = 2.1
|
|
|
|
if inc_model:
|
|
self.model_count += 1
|
|
|
|
def advance_population(self):
|
|
"""
|
|
advance the population by one step.
|
|
|
|
this method just calls advance_particle() for each particle of the population.
|
|
if generation is lower than zero, the method increases the generation number but does not advance the particles.
|
|
|
|
@return: None
|
|
"""
|
|
if not self._hold_once:
|
|
self.generation += 1
|
|
for index, __ in enumerate(self.pos):
|
|
self.advance_particle(index)
|
|
self._hold_once = False
|
|
|
|
def advance_particle(self, index):
|
|
"""
|
|
advance a particle by one step.
|
|
|
|
@param index: index of the particle in the population.
|
|
"""
|
|
|
|
# note: the following two identifiers are views,
|
|
# assignment will modify the original array
|
|
pos = self.pos[index]
|
|
vel = self.vel[index]
|
|
# best fit that this individual has seen
|
|
xl = self.best[index]
|
|
# best fit that a group of others have seen
|
|
xg = self.best_friend(index)
|
|
|
|
for key in self.model_start:
|
|
# update velocity
|
|
dxl = xl[key] - pos[key]
|
|
dxg = xg[key] - pos[key]
|
|
pv = np.random.random()
|
|
pl = np.random.random()
|
|
pg = np.random.random()
|
|
vel[key] = (self.momentum * pv * vel[key] +
|
|
self.attract_local * pl * dxl +
|
|
self.attract_global * pg * dxg)
|
|
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
|
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
|
self.velocity_constrain_mode)
|
|
# update position
|
|
pos[key] += vel[key]
|
|
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
|
|
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
|
|
self.position_constrain_mode)
|
|
|
|
self.update_particle_info(index)
|
|
|
|
@staticmethod
|
|
def constrain_velocity(_pos, _vel, _min, _max, _mode='default'):
|
|
"""
|
|
constrain a velocity to the given bounds.
|
|
|
|
@param _pos: current position of the particle.
|
|
|
|
@param _vel: new velocity of the particle, i.e. distance to move.
|
|
|
|
@param _min: lower position boundary.
|
|
|
|
@param _max: upper position boundary.
|
|
_max must be greater or equal to _min.
|
|
|
|
@param _mode: what to do if a boundary constraint is violated.
|
|
reserved for future use. should be set to 'default'.
|
|
|
|
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
|
in the current implementation only the velocity may change.
|
|
however, in future versions any of these values may change.
|
|
"""
|
|
d = abs(_max - _min) / 2.0
|
|
if d > 0.0:
|
|
while abs(_vel) >= d:
|
|
_vel /= 2.0
|
|
else:
|
|
_vel = 0.0
|
|
return _pos, _vel, _min, _max
|
|
|
|
@staticmethod
|
|
def constrain_position(_pos, _vel, _min, _max, _mode='default'):
|
|
"""
|
|
constrain a position to the given bounds.
|
|
|
|
@param _pos: new position of the particle, possible out of bounds.
|
|
|
|
@param _vel: velocity of the particle, i.e. distance from the previous position.
|
|
_vel must be lower than _max - _min.
|
|
|
|
@param _min: lower boundary.
|
|
|
|
@param _max: upper boundary.
|
|
_max must be greater or equal to _min.
|
|
|
|
@param _mode: what to do if a boundary constraint is violated:
|
|
@arg 're-enter': re-enter from the opposite side of the parameter interval.
|
|
@arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
|
|
@arg 'scatter': place the particle at a random place between its old position and the violated boundary.
|
|
@arg 'stick': place the particle at the violated boundary.
|
|
@arg 'expand': move the boundary so that the particle fits.
|
|
@arg 'random': place the particle at a random position between the lower and upper boundaries.
|
|
@arg 'default': the default mode is 'bounce'. this may change in future versions.
|
|
|
|
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
|
depending on the mode, any of these values may change.
|
|
the velocity is adjusted to be consistent with the change of position.
|
|
"""
|
|
_rng = max(_max - _min, 0.0)
|
|
_old = _pos - _vel
|
|
|
|
# prevent undershoot
|
|
if _vel > 0.0 and _pos < _min:
|
|
_pos = _min
|
|
_vel = _pos - _old
|
|
if _vel < 0.0 and _pos > _max:
|
|
_pos = _max
|
|
_vel = _pos - _old
|
|
|
|
assert abs(_vel) <= _rng, \
|
|
"velocity: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
|
|
assert (_vel >= 0 and _pos >= _min) or (_vel <= 0 and _pos <= _max), \
|
|
"undershoot: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
|
|
|
|
if _rng > 0.0:
|
|
while _pos > _max:
|
|
if _mode == 're-enter':
|
|
_pos -= _rng
|
|
elif _mode == 'bounce' or _mode == 'default':
|
|
_pos = _max - (_pos - _max)
|
|
_vel = -_vel
|
|
elif _mode == 'scatter':
|
|
_pos = _old + (_max - _old) * np.random.random()
|
|
_vel = _pos - _old
|
|
elif _mode == 'stick':
|
|
_pos = _max
|
|
_vel = _pos - _old
|
|
elif _mode == 'expand':
|
|
_max = _pos
|
|
elif _mode == 'random':
|
|
_pos = _min + _rng * np.random.random()
|
|
_vel = _pos - _old
|
|
else:
|
|
raise ValueError('invalid constrain mode')
|
|
|
|
while _pos < _min:
|
|
if _mode == 're-enter':
|
|
_pos += _rng
|
|
elif _mode == 'bounce' or _mode == 'default':
|
|
_pos = _min - (_pos - _min)
|
|
_vel = -_vel
|
|
elif _mode == 'scatter':
|
|
_pos = _old + (_min - _old) * np.random.random()
|
|
_vel = _pos - _old
|
|
elif _mode == 'stick':
|
|
_pos = _min
|
|
_vel = _pos - _old
|
|
elif _mode == 'expand':
|
|
_min = _pos
|
|
elif _mode == 'random':
|
|
_pos = _min + _rng * np.random.random()
|
|
_vel = _pos - _old
|
|
else:
|
|
raise ValueError('invalid constrain mode')
|
|
else:
|
|
_pos = _max
|
|
_vel = 0.0
|
|
|
|
return _pos, _vel, _min, _max
|
|
|
|
# noinspection PyUnusedLocal
|
|
def best_friend(self, index):
|
|
"""
|
|
select the best fit out of a random set of particles
|
|
|
|
returns the "best friend"
|
|
"""
|
|
friends = np.random.choice(self.best, self.friends, replace=False)
|
|
index = np.argmin(friends['_rfac'])
|
|
return friends[index]
|
|
|
|
def add_result(self, particle, rfac):
|
|
"""
|
|
add a calculation particle to the results array, and update the best fit array.
|
|
|
|
@param particle: dictionary of model parameters and particle values.
|
|
the keys must correspond to the columns of the pos array,
|
|
i.e. the names of the model parameters plus the _rfac, _particle, and _model fields.
|
|
|
|
@param rfac: calculated R-factor.
|
|
the R-factor is written to the '_rfac' field.
|
|
|
|
@return better (bool): True if the new R-factor is better than the particle's previous best mark.
|
|
"""
|
|
particle['_rfac'] = rfac
|
|
l = [particle[n] for n in self.results.dtype.names]
|
|
t = tuple(l)
|
|
a = np.asarray(t, dtype=self.results.dtype)
|
|
self.results = np.append(self.results, a)
|
|
index = particle['_particle']
|
|
better = particle['_rfac'] < self.best['_rfac'][index]
|
|
if better:
|
|
self.best[index] = a
|
|
|
|
return better
|
|
|
|
def is_converged(self, tol=0.01):
|
|
"""
|
|
check whether the population has converged.
|
|
|
|
convergence is reached when the R-factors of the N latest results,
|
|
do not vary more than tol, where N is the size of the population.
|
|
|
|
@param tol: max. difference allowed between greatest and lowest value of the R factor in the population.
|
|
"""
|
|
nres = self.results.shape[0]
|
|
npop = self.pos.shape[0]
|
|
if nres >= npop:
|
|
rfac1 = np.min(self.results['_rfac'][-npop:])
|
|
rfac2 = np.max(self.results['_rfac'][-npop:])
|
|
converg = rfac2 - rfac1 < tol
|
|
return converg
|
|
else:
|
|
return False
|
|
|
|
def save_array(self, filename, array):
|
|
"""
|
|
save a population array to a text file.
|
|
|
|
the columns are space-delimited.
|
|
the first line contains the column names.
|
|
|
|
@param filename: name of destination file, optionally including a path.
|
|
|
|
@param array: population array to save.
|
|
must be one of self.pos, self.vel, self.best, self.results
|
|
"""
|
|
header = " ".join(self.results.dtype.names)
|
|
np.savetxt(filename, array, fmt='%g', header=header)
|
|
|
|
def load_array(self, filename, array):
|
|
"""
|
|
load a population array from a text file.
|
|
|
|
the array to load must be compatible with the current population
|
|
(same number of rows, same columns).
|
|
the first row must contain column names.
|
|
the ordering of columns may be different.
|
|
the returned array is ordered according to the array argument.
|
|
|
|
@param filename: name of source file, optionally including a path.
|
|
|
|
@param array: population array to load.
|
|
must be one of self.pos, self.vel, self.results.
|
|
|
|
@return array with loaded data.
|
|
this may be the same instance as on input.
|
|
|
|
@raise AssertionError if the number of rows of the two files differ.
|
|
"""
|
|
data = np.genfromtxt(filename, names=True)
|
|
assert data.shape == array.shape
|
|
for name in data.dtype.names:
|
|
array[name] = data[name]
|
|
return array
|
|
|
|
def save_population(self, base_filename):
|
|
"""
|
|
save the population array to a set of text files.
|
|
|
|
the file name extensions are .pos, .vel, and .best
|
|
"""
|
|
self.save_array(base_filename + ".pos", self.pos)
|
|
self.save_array(base_filename + ".vel", self.vel)
|
|
self.save_array(base_filename + ".best", self.best)
|
|
|
|
def load_population(self, base_filename):
|
|
"""
|
|
load the population array from a set of previously saved text files.
|
|
this can be used to continue an optimization job.
|
|
|
|
the file name extensions are .pos, .vel, and .best.
|
|
the files must have the same format as produced by save_population.
|
|
the files must have the same number of rows.
|
|
"""
|
|
self.pos = self.load_array(base_filename + ".pos", self.pos)
|
|
self.vel = self.load_array(base_filename + ".vel", self.vel)
|
|
self.best = self.load_array(base_filename + ".best", self.best)
|
|
|
|
def save_results(self, filename):
|
|
"""
|
|
saves the complete list of calculations results.
|
|
"""
|
|
self.save_array(filename, self.results)
|
|
|
|
|
|
class ParticleSwarmHandler(handlers.ModelHandler):
|
|
"""
|
|
model handler which implements the particle swarm optimization algorithm.
|
|
|
|
"""
|
|
|
|
## @var _pop (Population)
|
|
# holds the population object.
|
|
|
|
## @var _pop_size (int)
|
|
# number of particles in the swarm.
|
|
|
|
## @var _outfile (file)
|
|
# output file for model parametes and R factor.
|
|
# the file is open during calculations.
|
|
# each calculation result adds one line.
|
|
|
|
## @var _model_time (timedelta)
|
|
# estimated CPU time to calculate one model.
|
|
# this value is the maximum time measured of the completed calculations.
|
|
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
|
|
|
|
## @var _converged (bool)
|
|
# indicates that the population has converged.
|
|
# convergence is detected by calling Population.is_converged().
|
|
# once convergence has been reached, this flag is set, and further convergence tests are skipped.
|
|
|
|
## @var _timeout (bool)
|
|
# indicates when the handler has run out of time,
|
|
# i.e. time is up before convergence has been reached.
|
|
# if _timeout is True, create_tasks() will not create further tasks,
|
|
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
|
|
|
|
## @var _invalid_limit (int)
|
|
# maximum tolerated number of invalid calculations.
|
|
#
|
|
# if the number of invalid calculations (self._invalid_count) exceeds this limit,
|
|
# the optimization is aborted.
|
|
# the variable is initialized by self.setup() to 10 times the population size.
|
|
|
|
def __init__(self):
|
|
super(ParticleSwarmHandler, self).__init__()
|
|
self._pop = None
|
|
self._pop_size = 0
|
|
self._outfile = None
|
|
self._model_time = datetime.timedelta()
|
|
self._converged = False
|
|
self._timeout = False
|
|
self._invalid_limit = 10
|
|
|
|
def setup(self, project, slots):
|
|
"""
|
|
initialize the particle swarm and open an output file.
|
|
|
|
the population size is set to project.pop_size if it is defined and greater than 4.
|
|
otherwise, it defaults to <code>max(2 * slots, 4)</code>.
|
|
|
|
for good efficiency the population size (number of particles) should be
|
|
greater or equal to the number of available processing slots,
|
|
otherwise the next generation is created before all particles have been calculated
|
|
which may slow down convergence.
|
|
|
|
if calculations take a long time compared to the available computation time
|
|
or spawn a lot of sub-tasks due to complex symmetry,
|
|
and you prefer to allow for a good number of generations,
|
|
you should override the population size.
|
|
|
|
@param project: project instance.
|
|
|
|
@param slots: number of calculation processes available through MPI.
|
|
|
|
@return: None
|
|
"""
|
|
super(ParticleSwarmHandler, self).setup(project, slots)
|
|
|
|
_min_size = 4
|
|
if project.pop_size:
|
|
self._pop_size = max(project.pop_size, _min_size)
|
|
else:
|
|
self._pop_size = max(self._slots * 2, _min_size)
|
|
self._pop = Population()
|
|
self._pop.setup(self._pop_size, self._project.create_domain(), self._project.history_file,
|
|
self._project.recalc_history)
|
|
self._invalid_limit = self._pop_size * 10
|
|
|
|
self._outfile = open(self._project.output_file + ".dat", "w")
|
|
self._outfile.write("# ")
|
|
self._outfile.write(" ".join(self._pop.results.dtype.names))
|
|
self._outfile.write("\n")
|
|
|
|
return None
|
|
|
|
def cleanup(self):
|
|
self._outfile.close()
|
|
super(ParticleSwarmHandler, self).cleanup()
|
|
|
|
def create_tasks(self, parent_task):
|
|
"""
|
|
develop the particle population and create a calculation task per particle.
|
|
|
|
this method advances the population by one step.
|
|
it generates one task for each particle if its model number is positive.
|
|
negative model numbers indicate that the particle is used for seeding
|
|
and does not need to be calculated in the first generation.
|
|
|
|
if the time limit is approaching, no new tasks are created.
|
|
|
|
the process loop calls this method every time the length of the task queue drops
|
|
below the number of calculation processes (slots).
|
|
this means in particular that a population will not be completely calculated
|
|
before the next generation starts.
|
|
for efficiency reasons, we do not wait until a population is complete.
|
|
this will cause a certain mixing of generations and slow down convergence
|
|
because the best peer position in the generation may not be known yet.
|
|
the effect can be reduced by making the population larger than the number of processes.
|
|
|
|
@return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
|
|
"""
|
|
|
|
super(ParticleSwarmHandler, self).create_tasks(parent_task)
|
|
|
|
# this is the top-level handler, we expect just one parent: root.
|
|
parent_id = parent_task.id
|
|
assert parent_id == (-1, -1, -1, -1, -1)
|
|
self._parent_tasks[parent_id] = parent_task
|
|
|
|
time_pending = self._model_time * len(self._pending_tasks)
|
|
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
|
|
|
|
out_tasks = []
|
|
if not self._timeout and not self._converged:
|
|
self._pop.advance_population()
|
|
|
|
for pos in self._pop.pos_gen():
|
|
time_pending += self._model_time
|
|
if time_pending > time_avail:
|
|
self._timeout = True
|
|
logger.info("time limit reached")
|
|
break
|
|
|
|
if pos['_model'] >= 0:
|
|
new_task = parent_task.copy()
|
|
new_task.parent_id = parent_id
|
|
new_task.model = pos
|
|
new_task.change_id(model=pos['_model'])
|
|
|
|
child_id = new_task.id
|
|
self._pending_tasks[child_id] = new_task
|
|
out_tasks.append(new_task)
|
|
|
|
return out_tasks
|
|
|
|
def add_result(self, task):
|
|
"""
|
|
calculate the R factor of the result and add it to the results list of the population.
|
|
|
|
* save the current population.
|
|
* append the result to the result output file.
|
|
* update the execution time statistics.
|
|
* remove temporary files if requested.
|
|
* check whether the population has converged.
|
|
|
|
@return parent task (CalculationTask) if the optimization has converged, @c None otherwise.
|
|
"""
|
|
super(ParticleSwarmHandler, self).add_result(task)
|
|
|
|
self._complete_tasks[task.id] = task
|
|
del self._pending_tasks[task.id]
|
|
parent_task = self._parent_tasks[task.parent_id]
|
|
|
|
rfac = 1.0
|
|
if task.result_valid:
|
|
try:
|
|
rfac = self._project.calc_rfactor(task)
|
|
except ValueError:
|
|
task.result_valid = False
|
|
self._invalid_count += 1
|
|
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
|
|
|
|
task.model['_rfac'] = rfac
|
|
self._pop.add_result(task.model, rfac)
|
|
self._pop.save_population(self._project.output_file + ".pop")
|
|
|
|
if self._outfile:
|
|
s = (str(task.model[name]) for name in self._pop.results.dtype.names)
|
|
self._outfile.write(" ".join(s))
|
|
self._outfile.write("\n")
|
|
self._outfile.flush()
|
|
|
|
self._project.files.update_model_rfac(task.id.model, rfac)
|
|
self._project.files.set_model_complete(task.id.model, True)
|
|
|
|
if task.result_valid:
|
|
if self._pop.is_converged() and not self._converged:
|
|
logger.info("population converged")
|
|
self._converged = True
|
|
|
|
if task.time > self._model_time:
|
|
self._model_time = task.time
|
|
else:
|
|
if self._invalid_count >= self._invalid_limit:
|
|
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
|
|
self._converged = True
|
|
|
|
# optimization complete?
|
|
if (self._timeout or self._converged) and len(self._pending_tasks) == 0:
|
|
del self._parent_tasks[parent_task.id]
|
|
else:
|
|
parent_task = None
|
|
|
|
self.cleanup_files(keep=self._pop_size)
|
|
return parent_task
|