1370 lines
55 KiB
Python
1370 lines
55 KiB
Python
"""
|
|
@package pmsco.optimizers.population
|
|
base classes for population-based optimizers.
|
|
|
|
a _population_ is a set of individuals or particles
|
|
that can assume coordinates from the parameter domain.
|
|
a tuple of coordinates is also called _model parameters_ which define the _model_.
|
|
the individuals travel through parameter space according to an algorithm defined separately.
|
|
depending on the algorithm, the population can converge towards the optimum coordinates based on calculated R-factors.
|
|
the population evolves in steps where each iteration is a new _generation_.
|
|
|
|
this module defines the base classes for population-based optimizers.
|
|
the evolutionary algorithms are implemented in other modules.
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import datetime
|
|
import logging
|
|
import math
|
|
import numpy as np
|
|
import os
|
|
import time
|
|
|
|
from pmsco.compat import open
|
|
import pmsco.handlers as handlers
|
|
import pmsco.graphics.rfactor as grfactor
|
|
from pmsco.helpers import BraceMessage as BMsg
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
CONSTRAIN_MODES = {'default', 're-enter', 'bounce', 'scatter', 'stick', 'expand', 'random', 'error'}
|
|
|
|
|
|
class Population(object):
|
|
"""
|
|
basic population class.
|
|
|
|
this class maintains the data structures of a particle population
|
|
and provides generic facilities to manipulate the population.
|
|
|
|
the general processing steps are:
|
|
1. create the object instance of a sub-class such as swarm.SwarmPopulation.
|
|
2. set the general properties of the population and algorithm.
|
|
3. call @ref setup to initialize the population and its data structures.
|
|
4. repeatedly call @ref advance_population as necessary.
|
|
|
|
the population evolves mainly by the @ref advance_population method
|
|
which is implemented in sub-classes according to the specific optimization algorithm.
|
|
|
|
the base class also provides other facilities to manipulate the population:
|
|
- _randomize_ produces a completely random population.
|
|
this is usually done once during setup (generation 0).
|
|
- _seed_ injects results from a previous calculation into the first generation.
|
|
this is usually done once during setup (generation 0).
|
|
- _patch_ injects explicit particle values at any generation.
|
|
patches are typically created by the user to explore parts of the parameter space
|
|
that the algorithm has not covered, or to throw the population out of a local optimum.
|
|
- _import_ loads a generation from an explicit parameter table or file.
|
|
this can be used as an interface for external optimization algorithms.
|
|
|
|
sub-classes that implement a specific optimization algorithm should override
|
|
- @ref setup
|
|
- @ref advance_population
|
|
"""
|
|
|
|
## @var size_req
|
|
# requested number of particles.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var size_act
|
|
# actual number of particles.
|
|
# this is the number of particles that @ref pos_gen will return.
|
|
# it may vary in the course of the optimization but is always lower or equal to @ref size_req.
|
|
|
|
## @var model_start
|
|
# (dict) initial model parameters.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_min
|
|
# (dict) low limits of the model parameters.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_max
|
|
# (dict) high limits of the model parameters.
|
|
# if min == max, the parameter is kept constant.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_max
|
|
# (dict) high limits of the model parameters.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var model_step
|
|
# (dict) initial velocity (difference between two steps) of the particle.
|
|
# read-only. call setup() to change this attribute.
|
|
|
|
## @var generation
|
|
# generation number. the counter is incremented by advance_population().
|
|
# initial value = 0.
|
|
|
|
## @var model_count
|
|
# model number.
|
|
# the counter is incremented by advance_particle() each time a particle position is changed.
|
|
# initial value = 0.
|
|
|
|
## @var pos
|
|
# (numpy.ndarray) current positions of each particle.
|
|
#
|
|
# the column names include the names of the model parameters, taken from domain.start,
|
|
# and the special names @c '_particle', @c '_model', @c '_rfac'.
|
|
# the special fields have the following meanings:
|
|
#
|
|
# * @c '_particle': index of the particle in the array.
|
|
# the particle index is used to match a calculation result and its original particle.
|
|
# it must be preserved during the calculation process.
|
|
#
|
|
# * @c '_gen': generation number.
|
|
# the generation number counts the number of calls to advance_population().
|
|
# this field is not used internally.
|
|
# the first population is generation 0.
|
|
# models with negative generation numbers are ignored by the model handler,
|
|
# negative numbers can occur with seed models that don't need to be re-calculated.
|
|
#
|
|
# * @c '_model': model number.
|
|
# the model number counts the number of calls to advance_particle().
|
|
# the field is filled with the current value of model_count whenever the position is changed.
|
|
# this field is not used internally.
|
|
# the model handlers use it to derive their model ID.
|
|
#
|
|
# * @c '_rfac': calculated R-factor for this position.
|
|
# this field is meaningful in the best and results arrays only
|
|
# where it is set by the add_result() method.
|
|
# in the pos and vel arrays, the field value is arbitrary.
|
|
# valid R-factors are between 0 and 2.
|
|
# the field is initialized with a value greater than 2 if the calculation hasn't been done yet.
|
|
#
|
|
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
|
|
# this object is a <em>view</em> of the original array item
|
|
|
|
## @var pos_patch
|
|
# (numpy.ndarray) positions to patch a live population.
|
|
#
|
|
#
|
|
|
|
## @var vel
|
|
# (numpy.ndarray) current the velocities of each particle.
|
|
# the structure is the same as for the pos array.
|
|
|
|
## @var best
|
|
# (numpy.ndarray) best positions found by each particle so far.
|
|
# the structure is the same as for the pos array.
|
|
|
|
## @var results
|
|
# (numpy.ndarray) all positions and resulting R-factors calculated.
|
|
# the structure is the same as for the pos array.
|
|
|
|
## @var _hold_once
|
|
# (bool) hold the population once during the next update.
|
|
# if _hold_once is True, advance_population() will skip the update process once.
|
|
# this flag is set by setup() because it sets up a valid initial population.
|
|
# the caller then doesn't have to care whether to skip advance_population() after setup.
|
|
|
|
def __init__(self):
|
|
"""
|
|
initialize the population object.
|
|
|
|
"""
|
|
self.size_req = 0
|
|
self.size_act = 0
|
|
self.model_start = {}
|
|
self.model_min = {}
|
|
self.model_max = {}
|
|
self.model_step = {}
|
|
self.model_start_array = None
|
|
self.model_min_array = None
|
|
self.model_max_array = None
|
|
self.model_step_array = None
|
|
|
|
self.position_constrain_mode = 'default'
|
|
self.velocity_constrain_mode = 'default'
|
|
|
|
self.generation = 0
|
|
self.model_count = 0
|
|
self._hold_once = False
|
|
|
|
self.pos = None
|
|
self.vel = None
|
|
self.best = None
|
|
self.results = None
|
|
self.pos_import = None
|
|
self.pos_patch = None
|
|
|
|
def pos_gen(self):
|
|
"""
|
|
generator for dictionaries of the pos array.
|
|
|
|
the generator can be used to loop over the array.
|
|
on each iteration, it yields a dictionary of the position at the current index.
|
|
for example,
|
|
@code{.py}
|
|
for pos in pop.pos_gen():
|
|
print pos['_index'], pos['_rfac']
|
|
@endcode
|
|
"""
|
|
return ({name: pos[name] for name in pos.dtype.names}
|
|
for idx, pos in enumerate(self.pos) if idx < self.size_act)
|
|
|
|
def vel_gen(self):
|
|
"""
|
|
generator for dictionaries of the vel array.
|
|
|
|
@see pos_gen() for details.
|
|
"""
|
|
return ({name: vel[name] for name in vel.dtype.names}
|
|
for idx, vel in enumerate(self.vel) if idx < self.size_act)
|
|
|
|
def best_gen(self):
|
|
"""
|
|
generator for dictionaries of the best array.
|
|
|
|
@see pos_gen() for details.
|
|
"""
|
|
return ({name: best[name] for name in best.dtype.names} for best in self.best)
|
|
|
|
def results_gen(self):
|
|
"""
|
|
generator for dictionaries of the results array.
|
|
|
|
@see pos_gen() for details.
|
|
"""
|
|
return ({name: results[name] for name in results.dtype.names} for results in self.results)
|
|
|
|
@classmethod
|
|
def get_pop_dtype(cls, model_params):
|
|
"""
|
|
get numpy array data type for model parameters and population control variables.
|
|
|
|
@param model_params: dictionary of model parameters or list of parameter names.
|
|
|
|
@return: dtype for use with numpy array constructors.
|
|
this is a sorted list of (name, type) tuples.
|
|
"""
|
|
dt = []
|
|
for key in model_params:
|
|
if key[0] != '_':
|
|
dt.append((key, 'f8'))
|
|
dt.append(('_particle', 'i8'))
|
|
dt.append(('_gen', 'i8'))
|
|
dt.append(('_model', 'i8'))
|
|
dt.append(('_rfac', 'f8'))
|
|
return sorted(dt, key=lambda t: t[0].lower())
|
|
|
|
@classmethod
|
|
def get_model_dtype(cls, model_params):
|
|
"""
|
|
get numpy array data type for model parameters (without control variables).
|
|
|
|
@param model_params: dictionary of model parameters or list of parameter names.
|
|
|
|
@return: dtype for use with numpy array constructors.
|
|
this is a sorted list of (name, type) tuples.
|
|
"""
|
|
dt = []
|
|
for key in model_params:
|
|
if key[0] != '_':
|
|
dt.append((key, 'f8'))
|
|
return sorted(dt, key=lambda t: t[0].lower())
|
|
|
|
@classmethod
|
|
def get_model_array(cls, model_dict):
|
|
"""
|
|
convert a model dictionary to a model array.
|
|
|
|
the model array is a shape = 1 numpy structured array.
|
|
column names are the parameter names,
|
|
values of the single array item are the model parameters.
|
|
|
|
keys starting with an underscore (control parameters) are ignored.
|
|
|
|
@param model_dict: dictionary of model parameters
|
|
(keys = parameter names, values = parameter values)
|
|
@return numpy structured array of the model values.
|
|
"""
|
|
dt = cls.get_model_dtype(model_dict)
|
|
arr = np.zeros(1, dtype=dt)
|
|
for t in dt:
|
|
k = t[0]
|
|
arr[k] = model_dict[k]
|
|
return arr
|
|
|
|
def setup(self, size, domain, **kwargs):
|
|
"""
|
|
set up the population arrays seeded with previous results and the start model.
|
|
|
|
* set the population parameters and allocate the data arrays.
|
|
* set first particle to the initial guess.
|
|
* optionally, seed other particles with positions from a previous results file.
|
|
* randomize the remaining particles.
|
|
|
|
a seed file can be used to continue an interrupted optimization process.
|
|
see the description of the seed_from_file() method for details.
|
|
the seed_file and recalc_seed arguments of seed_from_file() are exposed as arguments,
|
|
the others are left at their defaults.
|
|
|
|
@param size: requested number of particles.
|
|
|
|
@param domain: definition of initial and limiting model parameters
|
|
expected by the cluster and parameters functions.
|
|
@arg domain.start: initial guess.
|
|
@arg domain.min: minimum values allowed.
|
|
@arg domain.max: maximum values allowed. if min == max, the parameter is kept constant.
|
|
@arg domain.step: depends on the actual algorithm.
|
|
not used in particle swarm.
|
|
standard deviation of mutations in genetic optimization.
|
|
|
|
the following arguments are keyword arguments that are passed to @ref seed_from_file.
|
|
see the description of the seed_from_file() method for details and additional parameters.
|
|
|
|
@param seed_file: path and name of the seed file.
|
|
if empty string, None or undefined, no seed file is loaded.
|
|
see the description of the seed_from_file() method for additional parameters.
|
|
|
|
@return: None
|
|
"""
|
|
self.size_req = size
|
|
self.size_act = size
|
|
self.model_start = domain.start
|
|
self.model_min = domain.min
|
|
self.model_max = domain.max
|
|
self.model_step = domain.step
|
|
self.model_start_array = self.get_model_array(domain.start)
|
|
self.model_min_array = self.get_model_array(domain.min)
|
|
self.model_max_array = self.get_model_array(domain.max)
|
|
self.model_step_array = self.get_model_array(domain.step)
|
|
|
|
# allocate arrays
|
|
dt = self.get_pop_dtype(self.model_start)
|
|
self.pos = np.zeros(self.size_req, dtype=dt)
|
|
self.vel = np.zeros(self.size_req, dtype=dt)
|
|
self.results = np.empty(0, dtype=dt)
|
|
self.pos_import = np.empty(0, dtype=dt)
|
|
|
|
# randomize population
|
|
self.generation = 0
|
|
self.randomize()
|
|
self.pos['_particle'] = np.arange(self.size_req)
|
|
self.pos['_gen'] = self.generation
|
|
self.pos['_model'] = np.arange(self.size_req)
|
|
self.pos['_rfac'] = 2.1
|
|
self.model_count = self.size_req
|
|
|
|
# add previous results
|
|
seed_file = kwargs.get('seed_file', '')
|
|
if seed_file:
|
|
kwargs['first_particle'] = 1
|
|
self.seed_from_file(**kwargs)
|
|
|
|
# seed first particle with start parameters
|
|
self.seed(self.model_start, index=0)
|
|
|
|
# initialize best array
|
|
self.best = self.pos.copy()
|
|
|
|
self._hold_once = True
|
|
|
|
def randomize(self, pos=True, vel=True):
|
|
"""
|
|
initializes a random population.
|
|
|
|
the position array is filled with random values (uniform distribution) from the parameter domain.
|
|
velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
|
|
|
|
the method does not update the particle info fields.
|
|
|
|
@param pos: randomize positions. if False, the positions are not changed.
|
|
@param vel: randomize velocities. if False, the velocities are not changed.
|
|
"""
|
|
if pos:
|
|
for key in self.model_start:
|
|
self.pos[key] = ((self.model_max[key] - self.model_min[key]) *
|
|
np.random.random_sample(self.pos.shape) + self.model_min[key])
|
|
if vel:
|
|
for key in self.model_start:
|
|
self.vel[key] = ((self.model_max[key] - self.model_min[key]) *
|
|
(np.random.random_sample(self.pos.shape) - 0.5) / 4.0)
|
|
|
|
def seed(self, params, index=0):
|
|
"""
|
|
set the one of the particles to the specified seed values.
|
|
|
|
the method does not update the particle info fields.
|
|
|
|
@param params: dictionary of model parameters.
|
|
the keys must match the ones of domain.start.
|
|
values that lie outside of the domain are skipped.
|
|
|
|
@param index: index of the particle that is seeded.
|
|
the index must be in the allowed range of the self.pos array.
|
|
0 is the first, -1 the last particle.
|
|
"""
|
|
for key, val in params.items():
|
|
if self.model_min[key] <= val <= self.model_max[key]:
|
|
self.pos[key][index] = val
|
|
|
|
def seed_from_file(self, seed_file, **kwargs):
|
|
"""
|
|
seed the population with previous results.
|
|
|
|
a seed file can be used to continue an interrupted optimization process.
|
|
|
|
the method loads the results which are lower than a specified R-factor into the position array,
|
|
starting from particle 0 and up to a specified limit or the size of the current population array.
|
|
the results are sorted by R-factor.
|
|
|
|
the method also updates the control arrays and variables
|
|
so that the population can be advanced and calculated.
|
|
the velocity and best position arrays are not changed and must be initialized (e.g. randomized) separately.
|
|
|
|
by default, the control fields are set such that calculations of the seeded particles are repeated.
|
|
this is recommended whenever the code, the experimental input, or the project arguments change
|
|
because all of them may have an influence on the R-factor.
|
|
re-calculation can be turned off by setting recalc_seed to false.
|
|
this is recommended only if the calculation is a direct continuation of a previous one
|
|
without any changes to the code or input.
|
|
in that case, the previous results are marked as generation -1.
|
|
upon the first iteration before running the scattering calculations,
|
|
new parameters will be derived by the swarm algorithm.
|
|
|
|
this method is called as a part of setup().
|
|
it must not be called after the optimization has started.
|
|
|
|
parameter values that lie outside the parameter domain (min/max) are left at their previous value.
|
|
|
|
@note this method does not initialize the remaining particles.
|
|
neither does it set the velocity and best position arrays of the seeded particles.
|
|
this must be done separately, e.g. by calling randomize() beforehand.
|
|
|
|
@param seed_file: path and name of the seed file.
|
|
this can be a file created by the @ref save_array or @ref save_results methods.
|
|
the columns of the plain-text file contain model parameters and
|
|
the _rfac values of a previous calculation.
|
|
additional columns are ignored.
|
|
the first row must contain the column names.
|
|
if a parameter column is missing, the corresponding parameter is unchanged.
|
|
in this case, a warning is added to the log file.
|
|
|
|
the number of rows does not need to be equal to the population size.
|
|
if it is lower, the remaining particles are unchanged.
|
|
if it is higher, only the ones with the lowest R factors are used.
|
|
results with R > rfac_limit are ignored in any case.
|
|
|
|
the following arguments are keyword arguments.
|
|
|
|
@param seed_limit: maximum number of seeds to use from the file.
|
|
the actual number of seeds that are imported is the minimum of
|
|
the population size, the number of seeds that comply with the rfac_limit and the seed_limit.
|
|
if seed_limit is set to 0 (default), the seed_limit is not enforced.
|
|
|
|
@param rfac_limit: only seeds having an R-factor lower than or equal to this limit are used.
|
|
|
|
@param recalc_seed: select whether the R-factors of the seed models are calculated again.
|
|
this is useful if the seed data was calculated for a different cluster, different set of parameters,
|
|
or different experimental data, and if the R-factors of the new optimization may be systematically greater.
|
|
set this argument to False only if the calculation is a continuation of a previous one
|
|
without any changes to the code.
|
|
|
|
@param first_particle: zero-based index of the first particle that is seeded from the file.
|
|
|
|
@return: actual number of seeds imported.
|
|
"""
|
|
count_limit = kwargs.get('seed_limit', 0)
|
|
rfac_limit = kwargs.get('rfac_limit', 0.8)
|
|
recalc_seed = kwargs.get('recalc_seed', True)
|
|
first_particle = kwargs.get('first_particle', 0)
|
|
|
|
if count_limit < 1:
|
|
count_limit = self.pos.shape[0]
|
|
count_limit = min(count_limit, self.pos.shape[0] - first_particle)
|
|
|
|
seed = np.genfromtxt(seed_file, names=True)
|
|
try:
|
|
seed = seed[seed['_rfac'] <= rfac_limit]
|
|
except ValueError:
|
|
recalc_seed = True
|
|
logger.warning(BMsg("missing _rfac column in seed file {hf}. re-calculating.", hf=seed_file))
|
|
else:
|
|
seed.sort(order='_rfac')
|
|
seed_size = min(seed.shape[0], count_limit)
|
|
seed = seed[0:seed_size]
|
|
|
|
first = first_particle
|
|
last = first_particle + seed_size
|
|
|
|
source_fields = set(seed.dtype.names)
|
|
dest_fields = set(self.model_start.keys())
|
|
common_fields = source_fields & dest_fields
|
|
if len(common_fields) < len(dest_fields):
|
|
logger.warning(BMsg("missing columns in seed file {hf}.", hf=seed_file))
|
|
|
|
logger.warning(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))
|
|
|
|
try:
|
|
self.pos['_rfac'][first:last] = seed['_rfac']
|
|
except ValueError:
|
|
self.pos['_rfac'][first:last] = 2.1
|
|
dest_index = np.arange(first, last)
|
|
for name in common_fields:
|
|
sel1 = np.less_equal(self.model_min[name], seed[name])
|
|
sel2 = np.greater_equal(self.model_max[name], seed[name])
|
|
sel = np.logical_and(sel1, sel2)
|
|
if not np.all(sel):
|
|
self.pos['_rfac'][dest_index[np.logical_not(sel)]] = 2.1
|
|
self.pos[name][dest_index[sel]] = seed[name][sel]
|
|
|
|
self.pos['_gen'][first:last] = self.generation
|
|
self.pos['_particle'][first:last] = np.arange(seed_size) + first
|
|
self.pos['_model'][first:last] = np.arange(seed_size) + first
|
|
if recalc_seed:
|
|
self.pos['_rfac'][first:last] = 2.1
|
|
logger.warning("models from seed file are re-calculated.")
|
|
else:
|
|
sel = self.pos['_rfac'][first:last] <= rfac_limit
|
|
self.pos['_gen'][dest_index[sel]] = -1
|
|
logger.warning(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))
|
|
|
|
return seed_size
|
|
|
|
def patch_from_file(self, patch_file):
|
|
"""
|
|
patch the population with specific parameter values.
|
|
|
|
a patch file can be used to change one or multiple particles of a live population
|
|
during the optimization process.
|
|
|
|
the file must have the correct format for load_array(),
|
|
and contain as many rows as particles to be patched.
|
|
the '_particle' field determines the destination particles.
|
|
the parameter columns don't need to be complete.
|
|
parameters of missing columns remain unchanged.
|
|
'_gen', '_rfac', '_model' are not used.
|
|
|
|
this method loads the patch file.
|
|
however, the patch is applied only upon the next execution of advance_population().
|
|
|
|
an info or warning message is printed to the log
|
|
depending on whether the filed contained a complete dataset or not.
|
|
|
|
@attention patching a live population is a potentially dangerous operation.
|
|
it may cause an optimization to abort because of an error in the file.
|
|
this method does not handle exceptions coming from numpy.genfromtxt
|
|
such as missing file (IOError) or conversion errors (ValueError).
|
|
exception handling should be done by the owner of the population (typically the model handler).
|
|
patch values that lie outside the population domain aresilently ignored.
|
|
|
|
@param patch_file: path and name of the patch file.
|
|
the file must have the correct format for load_array(),
|
|
|
|
@return: None
|
|
|
|
@raise IOError if the file doesn't exist.
|
|
|
|
@raise ValueError for conversion errors.
|
|
"""
|
|
self.pos_patch = np.genfromtxt(patch_file, names=True)
|
|
source_fields = set(self.pos_patch.dtype.names)
|
|
dest_fields = set(self.model_start.keys())
|
|
common_fields = source_fields & dest_fields
|
|
if len(common_fields) == 0 or self.pos_patch.shape[0] == 0 or '_particle' not in source_fields:
|
|
self.pos_patch = None
|
|
logger.warning(BMsg("patch file {pf} contains no data.", pf=patch_file))
|
|
if len(common_fields) < len(dest_fields):
|
|
logger.warning(BMsg("loaded patch file {pf}. some columns are missing.", pf=patch_file))
|
|
else:
|
|
logger.warning(BMsg("loaded patch file {pf}.", pf=patch_file))
|
|
|
|
def _apply_patch(self):
|
|
"""
|
|
patch the positions array with specific parameter values.
|
|
|
|
this method should be called by advance_population().
|
|
|
|
the method overwrites only parameter values, not control variables.
|
|
_particle indices that lie outside the range of available population items are ignored.
|
|
parameter values that lie outside the parameter domain (min/max) are ignored.
|
|
"""
|
|
if self.pos_patch is not None:
|
|
logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation))
|
|
source_fields = set(self.pos_patch.dtype.names)
|
|
dest_fields = set(self.model_start.keys())
|
|
common_fields = source_fields & dest_fields
|
|
for particle in self.pos_patch:
|
|
particle_index = int(particle['_particle'])
|
|
if 0 <= particle_index < self.pos.shape[0]:
|
|
for name in common_fields:
|
|
if self.model_min[name] <= particle[name] <= self.model_max[name]:
|
|
self.pos[particle_index][name] = particle[name]
|
|
self.pos_patch = None
|
|
|
|
def clear_import(self):
|
|
"""
|
|
clear the pos_import array.
|
|
|
|
resizes the array to size zero.
|
|
|
|
@return: None
|
|
"""
|
|
self.pos_import = np.resize(self.pos_import, 0)
|
|
|
|
def import_positions(self, source, timeout=10):
|
|
"""
|
|
import one or more positions from any data source.
|
|
|
|
the data source must provide all regular parameters of each model.
|
|
special parameters are ignored.
|
|
|
|
the positions are added to the internal self.pos_import.
|
|
|
|
@param source: specifies the data source. can be one of the following:
|
|
@arg (numpy.ndarray or numpy.void): structured array that can be added to self.positions,
|
|
having at least the same columns of regular model parameters.
|
|
@arg (sequence of dict, numpy.ndarray, numpy.void, named tuple):
|
|
each element must be syntactically compatible with a dict
|
|
that holds the model parameters.
|
|
@arg (str): file name that contains a table in the same format as self.save_array produces.
|
|
if an IOError occurs, the method retries in one-second intervals up to timeout.
|
|
@arg None: stop polling the data source (with callable source).
|
|
@arg (callable): a function that returns one of the above objects.
|
|
|
|
@param timeout: timeout for reading from a file in seconds.
|
|
|
|
@return (int) number of positions imported
|
|
"""
|
|
timeout = max(int(timeout), 1)
|
|
if source is None:
|
|
return 0
|
|
elif isinstance(source, np.ndarray):
|
|
return self.import_positions_array(source)
|
|
elif isinstance(source, np.void):
|
|
array = np.zeros(1, dtype=source.dtype)
|
|
array[0] = source
|
|
return self.import_positions(array)
|
|
elif isinstance(source, dict):
|
|
dtype = self.get_model_dtype(self.model_start)
|
|
row = [source[key] for key in self.model_start]
|
|
array = np.array([tuple(row)], dtype=dtype)
|
|
return self.import_positions(array)
|
|
elif isinstance(source, str):
|
|
for i in range(timeout):
|
|
try:
|
|
array = np.genfromtxt(source, names=True)
|
|
except IOError:
|
|
time.sleep(1)
|
|
else:
|
|
return self.import_positions(array)
|
|
return 0
|
|
elif callable(source):
|
|
return self.import_positions(source())
|
|
else:
|
|
count = 0
|
|
for item in source:
|
|
count += self.import_positions(item)
|
|
return count
|
|
|
|
def import_positions_array(self, source):
|
|
"""
|
|
import one or more positions from a numpy array.
|
|
|
|
@param source: (numpy.ndarray, same dtype as self.pos)
|
|
@param dest: (numpy.ndarray, same dtype as self.pos)
|
|
@return (int) number of positions imported
|
|
"""
|
|
source_fields = set(source.dtype.names)
|
|
dest_fields = set(self.model_start.keys())
|
|
common_fields = source_fields & dest_fields
|
|
if len(common_fields) < len(dest_fields):
|
|
raise ValueError(BMsg("missing columns during data import."))
|
|
|
|
count = source.shape[0]
|
|
first = self.pos_import.shape[0]
|
|
last = first + count
|
|
self.pos_import = np.resize(self.pos_import, last)
|
|
for name in common_fields:
|
|
self.pos_import[name][first:last] = source[name]
|
|
return count
|
|
|
|
def advance_from_import(self):
|
|
"""
|
|
advance the population according to imported positions.
|
|
|
|
the positions to import must be in the import queue @ref self.pos_import.
|
|
the number of particles imported is the lower of the queue size and specified population size.
|
|
if the import queue is shorter, the population size self.size_act is reduced
|
|
for the current generation.
|
|
it may grow again in the next generation if the import queue is long enough.
|
|
|
|
the used particles are removed from the import queue.
|
|
|
|
the method also performs a range check.
|
|
the parameter values are constrained according to self.position_constrain_mode
|
|
and the parameter domain self.model_min and self.model_max.
|
|
if the constrain mode is `error`, models that violate the constraints are ignored
|
|
and removed from the import queue.
|
|
|
|
@return (int) number of imported particles
|
|
"""
|
|
|
|
# constrain positions
|
|
index = 0
|
|
while index < min(self.pos.shape[0], self.pos_import.shape[0]):
|
|
pos0 = self.pos[index]
|
|
pos1 = self.pos_import[index]
|
|
for key in self.model_start:
|
|
vel = pos1[key] - pos0[key]
|
|
try:
|
|
pos1[key], vel, self.model_min[key], self.model_max[key] = \
|
|
self.constrain_position(pos1[key], vel, self.model_min[key], self.model_max[key],
|
|
self.position_constrain_mode)
|
|
except ValueError:
|
|
self.pos_import = np.delete(self.pos_import, index)
|
|
index -= 1
|
|
break
|
|
index += 1
|
|
|
|
# de-duplicate
|
|
index = 0
|
|
while index < self.pos_import.shape[0]:
|
|
pos0 = self.pos_import[index]
|
|
try:
|
|
dup = self.find_model(pos0, self.results) >= 0
|
|
except ValueError:
|
|
dup = False
|
|
try:
|
|
dup = dup or self.find_model(pos0, self.pos) >= 0
|
|
except ValueError:
|
|
pass
|
|
try:
|
|
dup = dup or 0 <= self.find_model(pos0, self.pos_import) < index
|
|
except ValueError:
|
|
pass
|
|
if dup:
|
|
self.pos_import = np.delete(self.pos_import, index)
|
|
else:
|
|
index += 1
|
|
|
|
# copy data
|
|
first = 0
|
|
last = min(self.pos.shape[0], self.pos_import.shape[0])
|
|
if last > first:
|
|
self.pos[first:last] = self.pos_import[first:last]
|
|
self.pos_import = np.delete(self.pos_import, range(first, last))
|
|
self.size_act = last - first
|
|
self.update_particle_info()
|
|
return last - first
|
|
|
|
def update_particle_info(self, index=None, inc_model=True):
|
|
"""
|
|
set the internal particle info fields.
|
|
|
|
the method updates the special info fields of the specified particle or the whole array:
|
|
@arg `_particle` is the array index,
|
|
@arg `_gen` is the current generation number from self.generation,
|
|
@arg `_model` is the incremental model number from self.model_count, and
|
|
@arg `_rfac` is set to the default value 2.1.
|
|
|
|
this method must be called at the end of the advance_population method
|
|
so that the results can be related to the original particle after the calculation.
|
|
|
|
@param index: (int) index of the particle in the self.pos array to update.
|
|
if None, the first self.size_act items of the array are updated.
|
|
|
|
@param inc_model: (bool) if True, increment the self.model_count after each assignment.
|
|
|
|
@return: None
|
|
"""
|
|
if index is None:
|
|
first = 0
|
|
last = self.size_act
|
|
else:
|
|
first = index
|
|
last = index + 1
|
|
for index in range(first, last):
|
|
particle = self.pos[index]
|
|
particle['_particle'] = index
|
|
particle['_gen'] = self.generation
|
|
particle['_model'] = self.model_count
|
|
particle['_rfac'] = 2.1
|
|
if inc_model:
|
|
self.model_count += 1
|
|
|
|
def advance_population(self):
|
|
"""
|
|
advance the population by one generation.
|
|
|
|
this method must be implemented in sub-classes.
|
|
the sub-class should call the inherited method or at least reset _hold_once.
|
|
|
|
@return: None
|
|
"""
|
|
self._apply_patch()
|
|
self._hold_once = False
|
|
|
|
@staticmethod
|
|
def constrain_velocity(_pos, _vel, _min, _max, _mode='default'):
|
|
"""
|
|
constrain a velocity to the given bounds.
|
|
|
|
@param _pos: current position of the particle.
|
|
|
|
@param _vel: new velocity of the particle, i.e. distance to move.
|
|
|
|
@param _min: lower position boundary.
|
|
|
|
@param _max: upper position boundary.
|
|
_max must be greater or equal to _min.
|
|
|
|
@param _mode: what to do if a boundary constraint is violated.
|
|
reserved for future use. should be set to 'default'.
|
|
|
|
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
|
|
in the current implementation only the velocity may change.
|
|
however, in future versions any of these values may change.
|
|
"""
|
|
d = abs(_max - _min) / 2.0
|
|
if d > 0.0:
|
|
while abs(_vel) >= d:
|
|
_vel /= 2.0
|
|
else:
|
|
_vel = 0.0
|
|
return _pos, _vel, _min, _max
|
|
|
|
@staticmethod
|
|
def constrain_position(_pos, _vel, _min, _max, _mode='default'):
|
|
"""
|
|
constrain a position to the given bounds.
|
|
|
|
this method resolves violations of parameter boundaries, i.e. when a particle is leaving the designated domain.
|
|
if a violation is detected, the method calculates an updated position inside the domain
|
|
according to the selected algorithm.
|
|
in some cases the velocity or boundaries have to be updated as well.
|
|
|
|
the method distinguishes overshoot and undershoot violations.
|
|
overshoot is the normal case when the particle is leaving the domain.
|
|
it is handled according to the selected algorithm.
|
|
|
|
undershoot is a special case where the particle was outside the boundaries before the move.
|
|
this case can occur in the beginning if the population is seeded with out-of-bounds values.
|
|
undershoot is always handled by placing the particle at a random position in the domain
|
|
regardless of the chosen constraint mode.
|
|
|
|
@note it is important to avoid bias while handling constraint violations.
|
|
else the swarm algorithm may become unstable or repetitive.
|
|
the bounce mode has given good results in the author's projects.
|
|
however, situations where the bounce mode is unsatisfactory cannot be excluded.
|
|
in those cases, the random mode should be resorted to.
|
|
the other modes are likely to introduce too much bias or unstability.
|
|
|
|
@param _pos: new position of the particle, possible out of bounds.
|
|
|
|
@param _vel: velocity of the particle, i.e. distance from the previous position.
|
|
abs(_vel) must be lower than _max - _min.
|
|
|
|
@param _min: lower boundary.
|
|
|
|
@param _max: upper boundary.
|
|
_max must be greater or equal to _min.
|
|
|
|
@param _mode: what to do if a boundary constraint is violated:
|
|
@arg 're-enter': re-enter from the opposite side of the parameter interval.
|
|
@arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
|
|
@arg 'scatter': place the particle at a random place between its old position and the violated boundary.
|
|
@arg 'stick': place the particle at the violated boundary.
|
|
@arg 'expand': move the boundary so that the particle fits.
|
|
@arg 'random': place the particle at a random position between the lower and upper boundaries.
|
|
@arg 'default': the default mode is 'bounce'. this may change in future versions.
|
|
@arg 'error': raise a ValueError.
|
|
|
|
@return tuple (new position, new velocity, new lower boundary, new upper boundary).
|
|
depending on the mode, any of these values may change.
|
|
the value of velocity is preserved but its sign may change.
|
|
"""
|
|
_rng = max(_max - _min, 0.0)
|
|
_old = _pos - _vel
|
|
|
|
# prevent undershoot
|
|
if _vel > 0.0 and _pos < _min:
|
|
_mode = 'random'
|
|
if _vel < 0.0 and _pos > _max:
|
|
_mode = 'random'
|
|
|
|
if _rng > 0.0:
|
|
while _pos > _max:
|
|
if _mode == 're-enter':
|
|
_pos -= _rng
|
|
elif _mode == 'bounce' or _mode == 'default':
|
|
_pos = _max - (_pos - _max)
|
|
_vel = -_vel
|
|
elif _mode == 'scatter':
|
|
_pos = _old + (_max - _old) * np.random.random()
|
|
elif _mode == 'stick':
|
|
_vel = _max - (_pos - _vel)
|
|
_pos = _max
|
|
elif _mode == 'expand':
|
|
_max = _pos
|
|
elif _mode == 'random':
|
|
_pos = _min + _rng * np.random.random()
|
|
elif _mode == 'error':
|
|
raise ValueError(BMsg('position out of range ({0} > {1}).', _pos, _max))
|
|
else:
|
|
raise ValueError('invalid constrain mode')
|
|
|
|
while _pos < _min:
|
|
if _mode == 're-enter':
|
|
_pos += _rng
|
|
elif _mode == 'bounce' or _mode == 'default':
|
|
_pos = _min - (_pos - _min)
|
|
_vel = -_vel
|
|
elif _mode == 'scatter':
|
|
_pos = _old + (_min - _old) * np.random.random()
|
|
elif _mode == 'stick':
|
|
_vel = _min - (_pos - _vel)
|
|
_pos = _min
|
|
elif _mode == 'expand':
|
|
_min = _pos
|
|
elif _mode == 'random':
|
|
_pos = _min + _rng * np.random.random()
|
|
elif _mode == 'error':
|
|
raise ValueError(BMsg('position out of range ({0} < {1}).', _pos, _min))
|
|
else:
|
|
raise ValueError('invalid constrain mode')
|
|
else:
|
|
_pos = _max
|
|
_vel = 0.0
|
|
|
|
return _pos, _vel, _min, _max
|
|
|
|
def add_result(self, particle, rfac):
|
|
"""
|
|
add a calculation particle to the results array, and update the best fit array.
|
|
|
|
@param particle: dictionary of model parameters and particle values.
|
|
the keys must correspond to the columns of the pos array,
|
|
i.e. the names of the model parameters plus the _rfac, _particle, and _model fields.
|
|
|
|
@param rfac: calculated R-factor.
|
|
the R-factor is written to the '_rfac' field.
|
|
|
|
@return better (bool): True if the new R-factor is better than the particle's previous best mark.
|
|
"""
|
|
particle['_rfac'] = rfac
|
|
l = [particle[n] for n in self.results.dtype.names]
|
|
t = tuple(l)
|
|
a = np.asarray(t, dtype=self.results.dtype)
|
|
self.results = np.append(self.results, a)
|
|
index = particle['_particle']
|
|
better = particle['_rfac'] < self.best['_rfac'][index]
|
|
if better:
|
|
self.best[index] = a
|
|
|
|
return better
|
|
|
|
def find_model(self, model, search_array=None, precision=1e-6):
|
|
"""
|
|
find a specific model in a population-like array.
|
|
|
|
@param model: dictionary or numpy.void array element of model parameters.
|
|
the keys must correspond to columns of the self.results array.
|
|
all values must match up to the given precision.
|
|
it is not necessary to specify all columns.
|
|
special columns starting with an underscore are ignored.
|
|
|
|
@param search_array: population-like numpy structured array to search for the model.
|
|
defaults to self.results if None.
|
|
|
|
@param precision: precision relative to model domain at which elements should be considered equal.
|
|
|
|
@return index of the first occurrence.
|
|
|
|
@raise ValueError if model is not found.
|
|
"""
|
|
if isinstance(model, np.void):
|
|
names = [name for name in model.dtype.names if name[0] != '_']
|
|
model_tuple = tuple([model[name] for name in names])
|
|
elif isinstance(model, dict):
|
|
names = [name for name in model.keys() if name[0] != '_']
|
|
model_tuple = tuple([model[name] for name in names])
|
|
else:
|
|
raise TypeError("unsupported model argument type")
|
|
|
|
# rewrite model, tolerance and results as two-dimensional array
|
|
if search_array is None:
|
|
search_array = self.results
|
|
results = np.empty((search_array.shape[0], len(names)))
|
|
for col, name in enumerate(names):
|
|
results[:, col] = search_array[name]
|
|
model = np.asarray(model_tuple, results.dtype)
|
|
tol = np.asarray([max(abs(self.model_max[name]), abs(self.model_min[name]), precision)
|
|
for name in names])
|
|
tol *= precision
|
|
|
|
diffs = np.abs((results - model) / tol)
|
|
norms = np.sum(diffs, axis=1)
|
|
idx = np.argmin(norms)
|
|
|
|
if norms[idx] < len(names):
|
|
return idx
|
|
else:
|
|
raise ValueError("requested model not found in results array")
|
|
|
|
def is_converged(self, tol=0.01):
|
|
"""
|
|
check whether the population has converged.
|
|
|
|
convergence is reached when the R-factors of the N latest results,
|
|
do not vary more than tol, where N is the size of the population.
|
|
|
|
@param tol: max. difference allowed between greatest and lowest value of the R factor in the population.
|
|
"""
|
|
nres = self.results.shape[0]
|
|
npop = self.pos.shape[0]
|
|
if nres >= npop:
|
|
rfac1 = np.min(self.results['_rfac'][-npop:])
|
|
rfac2 = np.max(self.results['_rfac'][-npop:])
|
|
converg = rfac2 - rfac1 < tol
|
|
return converg
|
|
else:
|
|
return False
|
|
|
|
def save_array(self, filename, array):
|
|
"""
|
|
save a population array to a text file.
|
|
|
|
the columns are space-delimited.
|
|
the first line contains the column names.
|
|
|
|
@param filename: name of destination file, optionally including a path.
|
|
|
|
@param array: population array to save.
|
|
must be one of self.pos, self.vel, self.best, self.results
|
|
"""
|
|
header = " ".join(array.dtype.names)
|
|
np.savetxt(filename, array, fmt='%g', header=header)
|
|
|
|
def load_array(self, filename, array):
|
|
"""
|
|
load a population array from a text file.
|
|
|
|
the array to load must be compatible with the current population
|
|
(same number of rows, same columns).
|
|
the first row must contain column names.
|
|
the ordering of columns may be different.
|
|
the returned array is ordered according to the array argument.
|
|
|
|
@param filename: name of source file, optionally including a path.
|
|
|
|
@param array: population array to load.
|
|
must be one of self.pos, self.vel, self.results.
|
|
|
|
@return array with loaded data.
|
|
this may be the same instance as on input.
|
|
|
|
@raise AssertionError if the number of rows of the two files differ.
|
|
"""
|
|
data = np.genfromtxt(filename, names=True)
|
|
assert data.shape == array.shape
|
|
for name in data.dtype.names:
|
|
array[name] = data[name]
|
|
return array
|
|
|
|
def save_population(self, base_filename):
|
|
"""
|
|
save the population array to a set of text files.
|
|
|
|
the file name extensions are .pos, .vel, and .best
|
|
"""
|
|
self.save_array(base_filename + ".pos", self.pos)
|
|
self.save_array(base_filename + ".vel", self.vel)
|
|
self.save_array(base_filename + ".best", self.best)
|
|
|
|
def load_population(self, base_filename):
|
|
"""
|
|
load the population array from a set of previously saved text files.
|
|
this can be used to continue an optimization job.
|
|
|
|
the file name extensions are .pos, .vel, and .best.
|
|
the files must have the same format as produced by save_population.
|
|
the files must have the same number of rows.
|
|
"""
|
|
self.pos = self.load_array(base_filename + ".pos", self.pos)
|
|
self.vel = self.load_array(base_filename + ".vel", self.vel)
|
|
self.best = self.load_array(base_filename + ".best", self.best)
|
|
self.size_act = self.pos.shape[0]
|
|
|
|
def save_results(self, filename):
|
|
"""
|
|
saves the complete list of calculations results.
|
|
"""
|
|
self.save_array(filename, self.results)
|
|
|
|
|
|
class PopulationHandler(handlers.ModelHandler):
|
|
"""
|
|
base class for population-based optimization handlers.
|
|
|
|
"""
|
|
|
|
## @var _pop (Population)
|
|
# holds the population object.
|
|
|
|
## @var _pop_size (int)
|
|
# number of particles in the swarm.
|
|
|
|
## @var _outfile (file)
|
|
# output file for model parametes and R factor.
|
|
# the file is open during calculations.
|
|
# each calculation result adds one line.
|
|
|
|
## @var _model_time (timedelta)
|
|
# estimated CPU time to calculate one model.
|
|
# this value is the maximum time measured of the completed calculations.
|
|
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
|
|
|
|
## @var _converged (bool)
|
|
# indicates that the population has converged.
|
|
# convergence is detected by calling Population.is_converged().
|
|
# once convergence has been reached, this flag is set, and further convergence tests are skipped.
|
|
|
|
## @var _timeout (bool)
|
|
# indicates when the handler has run out of time,
|
|
# i.e. time is up before convergence has been reached.
|
|
# if _timeout is True, create_tasks() will not create further tasks,
|
|
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
|
|
|
|
## @var _invalid_limit (int)
|
|
# maximum tolerated number of invalid calculations.
|
|
#
|
|
# if the number of invalid calculations (self._invalid_count) exceeds this limit,
|
|
# the optimization is aborted.
|
|
# the variable is initialized by self.setup() to 10 times the population size.
|
|
|
|
## @var patch_file (str)
|
|
# name of the patch file.
|
|
#
|
|
# if this file exists in the working directory and is newer than the previously loaded file,
|
|
# it is used to patch a living population.
|
|
# see the documentation of @ref pmsco.population.Population.
|
|
#
|
|
# the default file name is "pmsco_patch.pop"
|
|
|
|
def __init__(self):
|
|
super(PopulationHandler, self).__init__()
|
|
self._pop = None
|
|
self._pop_size = 0
|
|
self._model_time = datetime.timedelta()
|
|
self._converged = False
|
|
self._timeout = False
|
|
self._invalid_limit = 10
|
|
self.patch_file = "pmsco_patch.pop"
|
|
self._patch_last_mtime = 0
|
|
|
|
def setup(self, project, slots):
|
|
"""
|
|
initialize the particle swarm and open an output file.
|
|
|
|
the population size is set to `project.optimizer_params['pop_size']`
|
|
if it is defined and greater than 4.
|
|
otherwise, it defaults to `max(slots, 4)`.
|
|
|
|
for good efficiency the population size (number of particles) should be
|
|
greater or equal to the number of available processing slots,
|
|
otherwise the next generation is created before all particles have been calculated
|
|
which may slow down convergence.
|
|
|
|
if calculations take a long time compared to the available computation time
|
|
or spawn a lot of sub-tasks due to complex symmetry,
|
|
and you prefer to allow for a good number of generations,
|
|
you should override the population size.
|
|
|
|
@param project: project instance.
|
|
|
|
@param slots: number of calculation processes available through MPI.
|
|
|
|
@return: None
|
|
"""
|
|
super(PopulationHandler, self).setup(project, slots)
|
|
|
|
_min_size = 4
|
|
_def_size = self._slots
|
|
_req_size = project.optimizer_params.get('pop_size', 0)
|
|
self._pop_size = _req_size if _req_size >= _min_size else _def_size
|
|
self.setup_population()
|
|
self._invalid_limit = self._pop_size * 10
|
|
|
|
with open(self._project.output_file + ".dat", "w") as outfile:
|
|
outfile.write("# ")
|
|
outfile.write(" ".join(self._pop.results.dtype.names))
|
|
outfile.write("\n")
|
|
|
|
return None
|
|
|
|
def setup_population(self):
|
|
self._pop.setup(self._pop_size, self._project.create_domain(), **self._project.optimizer_params)
|
|
|
|
def cleanup(self):
|
|
super(PopulationHandler, self).cleanup()
|
|
|
|
def create_tasks(self, parent_task):
|
|
"""
|
|
develop the particle population and create a calculation task per particle.
|
|
|
|
this method advances the population by one step.
|
|
it generates one task for each particle if its model number is positive.
|
|
negative model numbers indicate that the particle is used for seeding
|
|
and does not need to be calculated in the first generation.
|
|
|
|
if the time limit is approaching, no new tasks are created.
|
|
|
|
the process loop calls this method every time the length of the task queue drops
|
|
below the number of calculation processes (slots).
|
|
this means in particular that a population will not be completely calculated
|
|
before the next generation starts.
|
|
for efficiency reasons, we do not wait until a population is complete.
|
|
this will cause a certain mixing of generations and slow down convergence
|
|
because the best peer position in the generation may not be known yet.
|
|
the effect can be reduced by making the population larger than the number of processes.
|
|
|
|
the created tasks are returned as the function result and added to self._pending_tasks.
|
|
|
|
@return list of generated tasks.
|
|
empty list if the optimization has converged (see Population.is_converged())
|
|
or if the time limit is approaching.
|
|
"""
|
|
|
|
super(PopulationHandler, self).create_tasks(parent_task)
|
|
|
|
# this is the top-level handler, we expect just one parent: root.
|
|
parent_id = parent_task.id
|
|
assert parent_id == (-1, -1, -1, -1, -1)
|
|
self._parent_tasks[parent_id] = parent_task
|
|
|
|
time_pending = self._model_time * len(self._pending_tasks)
|
|
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
|
|
|
|
new_tasks = []
|
|
if not self._timeout and not self._converged:
|
|
self._check_patch_file()
|
|
self._pop.advance_population()
|
|
|
|
for pos in self._pop.pos_gen():
|
|
time_pending += self._model_time
|
|
if time_pending > time_avail:
|
|
self._timeout = True
|
|
logger.warning("time limit reached")
|
|
new_tasks = []
|
|
break
|
|
|
|
if pos['_gen'] >= 0:
|
|
new_task = parent_task.copy()
|
|
new_task.parent_id = parent_id
|
|
new_task.model = pos
|
|
new_task.change_id(model=pos['_model'])
|
|
new_tasks.append(new_task)
|
|
|
|
for task in new_tasks:
|
|
self._pending_tasks[task.id] = task
|
|
|
|
return new_tasks
|
|
|
|
def _check_patch_file(self):
|
|
"""
|
|
load a patch file to modify the population during the calculation.
|
|
|
|
the name of the patch file is given by the patch_file attribute ("pmsco_patch.pop" by default).
|
|
if the file exists and is newer than a previously loaded version,
|
|
it is loaded into the patch buffer of the population and applied during the next evolution step.
|
|
|
|
@return: None
|
|
"""
|
|
try:
|
|
mtime = os.path.getmtime(self.patch_file)
|
|
except OSError:
|
|
mtime = 0
|
|
|
|
if mtime > self._patch_last_mtime:
|
|
self._patch_last_mtime = mtime + 1
|
|
try:
|
|
self._pop.patch_from_file(self.patch_file)
|
|
except IOError:
|
|
pass
|
|
except ValueError:
|
|
pass
|
|
|
|
def add_result(self, task):
|
|
"""
|
|
calculate the R factor of the result and add it to the results list of the population.
|
|
|
|
* save the current population.
|
|
* append the result to the result output file.
|
|
* update the execution time statistics.
|
|
* remove temporary files if requested.
|
|
* check whether the population has converged.
|
|
|
|
@return parent task (CalculationTask) if the optimization has converged, @c None otherwise.
|
|
"""
|
|
super(PopulationHandler, self).add_result(task)
|
|
|
|
self._complete_tasks[task.id] = task
|
|
del self._pending_tasks[task.id]
|
|
parent_task = self._parent_tasks[task.parent_id]
|
|
|
|
if task.result_valid:
|
|
assert not math.isnan(task.rfac)
|
|
task.model['_rfac'] = task.rfac
|
|
self._pop.add_result(task.model, task.rfac)
|
|
self._pop.save_population(self._project.output_file + ".pop")
|
|
|
|
with open(self._project.output_file + ".dat", "a") as outfile:
|
|
s = (str(task.model[name]) for name in self._pop.results.dtype.names)
|
|
outfile.write(" ".join(s))
|
|
outfile.write("\n")
|
|
|
|
self._project.files.update_model_rfac(task.id.model, task.rfac)
|
|
self._project.files.set_model_complete(task.id.model, True)
|
|
|
|
if task.result_valid:
|
|
if self._pop.is_converged() and not self._converged:
|
|
logger.warning("population converged")
|
|
self._converged = True
|
|
|
|
if task.time > self._model_time:
|
|
self._model_time = task.time
|
|
else:
|
|
self._invalid_count += 1
|
|
if self._invalid_count >= self._invalid_limit:
|
|
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
|
|
self._converged = True
|
|
|
|
# optimization complete?
|
|
if (self._timeout or self._converged) and len(self._pending_tasks) == 0:
|
|
del self._parent_tasks[parent_task.id]
|
|
else:
|
|
parent_task = None
|
|
|
|
self.cleanup_files(keep=self._pop_size)
|
|
return parent_task
|
|
|
|
def save_report(self, root_task):
|
|
"""
|
|
generate a graphical summary of the optimization.
|
|
|
|
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
|
|
|
|
@return: None
|
|
"""
|
|
super(PopulationHandler, self).save_report(root_task)
|
|
|
|
files = grfactor.render_results(self._project.output_file + ".dat", self._pop.results)
|
|
for f in files:
|
|
self._project.files.add_file(f, root_task.id.model, "report")
|