pmsco-public/pmsco/swarm.py
matthias muntwiler bbd16d0f94 add files for public distribution
based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
2017-11-22 14:55:20 +01:00

910 lines
35 KiB
Python

"""
@package pmsco.swarm
particle swarm optimization handler.
the module starts multiple MSC calculations and optimizes the model parameters
according to the particle swarm optimization algorithm.
Particle swarm optimization adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import division
import copy
import os
import datetime
import logging
import numpy as np
import handlers
from helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
CONSTRAIN_MODES = {'re-enter', 'bounce', 'scatter', 'stick', 'expand'}
class Population(object):
"""
particle swarm population.
"""
## @var size_req
# requested number of particles.
# read-only. call setup() to change this attribute.
## @var model_start
# (dict) initial model parameters.
# read-only. call setup() to change this attribute.
## @var model_min
# (dict) low limits of the model parameters.
# read-only. call setup() to change this attribute.
## @var model_max
# (dict) high limits of the model parameters.
# if min == max, the parameter is kept constant.
# read-only. call setup() to change this attribute.
## @var model_max
# (dict) high limits of the model parameters.
# read-only. call setup() to change this attribute.
## @var model_step
# (dict) initial velocity (difference between two steps) of the particle.
# read-only. call setup() to change this attribute.
## @var friends
# number of other particles that each particle consults for the global best fit.
# default = 3.
## @var momentum
# momentum of the particle.
# default = 0.689343.
## @var attract_local
# preference for returning to the local best fit
# default = 1.92694.
## @var attract_global
# preference for heading towards the global best fit.
# default = 1.92694
## @var generation
# generation number. the counter is incremented by advance_population().
# initial value = 0.
## @var model_count
# model number.
# the counter is incremented by advance_particle() each time a particle position is changed.
# initial value = 0.
## @var pos
# (numpy.ndarray) current positions of each particle.
#
# the column names include the names of the model parameters, taken from domain.start,
# and the special names @c '_particle', @c '_model', @c '_rfac'.
# the special fields have the following meanings:
#
# * @c '_particle': index of the particle in the array.
# the particle index is used to match a calculation result and its original particle.
# it must be preserved during the calculation process.
#
# * @c '_gen': generation number.
# the generation number counts the number of calls to advance_population().
# this field is not used internally.
# the first population is generation 0.
#
# * @c '_model': model number.
# the model number counts the number of calls to advance_particle().
# the field is filled with the current value of model_count whenever the position is changed.
# this field is not used internally.
# the model handlers use it to derive their model ID.
#
# * @c '_rfac': calculated R-factor for this position.
# this field is meaningful in the best and results arrays only
# where it is set by the add_result() method.
# in the pos and vel arrays, the field value is arbitrary.
#
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
# this object is a <em>view</em> of the original array item
## @var vel
# (numpy.ndarray) current the velocities of each particle.
# the structure is the same as for the pos array.
## @var best
# (numpy.ndarray) best positions found by each particle so far.
# the structure is the same as for the pos array.
## @var results
# (numpy.ndarray) all positions and resulting R-factors calculated.
# the structure is the same as for the pos array.
## @var _hold_once
# (bool) hold the population once during the next update.
# if _hold_once is True, advance_population() will skip the update process once.
# this flag is set by setup() because it sets up a valid initial population.
# the caller then doesn't have to care whether to skip advance_population() after setup.
def __init__(self):
"""
initialize the population object.
"""
self.size_req = 0
self.model_start = {}
self.model_min = {}
self.model_max = {}
self.model_step = {}
self.friends = 3
self.momentum = 0.689343
self.attract_local = 1.92694
self.attract_global = 1.92694
self.position_constrain_mode = 'default'
self.velocity_constrain_mode = 'default'
self.generation = 0
self.model_count = 0
self._hold_once = False
self.pos = None
self.vel = None
self.best = None
self.results = None
def pos_gen(self):
"""
generator for dictionaries of the pos array.
the generator can be used to loop over the array.
on each iteration, it yields a dictionary of the position at the current index.
for example,
@code{.py}
for pos in pop.pos_gen():
print pos['_index'], pos['_rfac']
@endcode
"""
return ({name: pos[name] for name in pos.dtype.names} for pos in self.pos)
def vel_gen(self):
"""
generator for dictionaries of the vel array.
@see pos_gen() for details.
"""
return ({name: vel[name] for name in vel.dtype.names} for vel in self.vel)
def best_gen(self):
"""
generator for dictionaries of the best array.
@see pos_gen() for details.
"""
return ({name: best[name] for name in best.dtype.names} for best in self.best)
def results_gen(self):
"""
generator for dictionaries of the results array.
@see pos_gen() for details.
"""
return ({name: results[name] for name in results.dtype.names} for results in self.results)
@staticmethod
def get_model_dtype(model_params):
"""
get numpy array data type for model parameters and swarm control variables.
@param model_params: dictionary of model parameters or list of parameter names.
@return: dtype for use with numpy array constructors.
this is a sorted list of (name, type) tuples.
"""
dt = []
for key in model_params:
dt.append((key, 'f4'))
dt.append(('_particle', 'i4'))
dt.append(('_gen', 'i4'))
dt.append(('_model', 'i4'))
dt.append(('_rfac', 'f4'))
dt.sort(key=lambda t: t[0].lower())
return dt
def setup(self, size, domain, history_file="", recalc_history=True):
"""
set up the population arrays seeded with previous results and the start model.
* set the population parameters and allocate the data arrays.
* set one particle to the initial guess, and the others to positions from a previous results file.
if the file contains less particles than allocated, the remaining particles are initialized randomly.
seeding from a history file can be used to continue an interrupted optimization process.
the method loads the results into the best and position arrays,
and updates the other arrays and variables
so that the population can be advanced and calculated.
by default, the calculations of the previous parameters are repeated.
this is recommended whenever the code, the experimental input, or the project arguments change
because all of them may have an influence on the R-factor.
re-calculation can be turned off by setting recalc_history to false.
this is recommended only if the calculation is a direct continuation of a previous one
without any changes to the code or input.
in that case, the previous results are marked as generation -1 with a negative model number.
upon the first iteration before running the scattering calculations,
new parameters will be derived by the swarm algorithm.
@param size: requested number of particles.
@param domain: definition of initial and limiting model parameters
expected by the cluster and parameters functions.
@arg domain.start: initial guess.
@arg domain.min: minimum values allowed.
@arg domain.max: maximum values allowed. if min == max, the parameter is kept constant.
@arg domain.step: initial velocity (difference between two steps) for particle swarm.
@param history_file: name of the results history file.
this can be a file created by the @ref save_array or @ref save_results methods.
the columns of the plain-text file contain model parameters and
the _rfac values of a previous calculation.
additional columns are ignored.
the first row must contain the column names.
if a parameter column is missing,
the corresponding parameter is seeded with a random value within the domain.
in this case, a warning is added to the log file.
the number of rows does not need to be equal to the population size.
if it is lower, the remaining particles are initialized randomly.
if it is higher, only the ones with the lowest R factors are used.
results with R >= 1.0 are ignored in any case.
@param recalc_history: select whether the R-factors of the historic models are calculated again.
this is useful if the historic data was calculated for a different cluster, different set of parameters,
or different experimental data, and if the R-factors of the new optimization may be systematically greater.
set this argument to False only if the calculation is a continuation of a previous one
without any changes to the code.
@return: None
"""
self.size_req = size
self.model_start = domain.start
self.model_min = domain.min
self.model_max = domain.max
self.model_step = domain.step
# allocate arrays
dt = self.get_model_dtype(self.model_start)
self.pos = np.zeros(self.size_req, dtype=dt)
self.vel = np.zeros(self.size_req, dtype=dt)
self.results = np.empty((0), dtype=dt)
# randomize population
self.generation = 0
self.randomize()
self.pos['_particle'] = np.arange(self.size_req)
self.pos['_gen'] = self.generation
self.pos['_model'] = np.arange(self.size_req)
self.pos['_rfac'] = 2.1
self.model_count = self.size_req
# add previous results
if history_file:
hist = np.genfromtxt(history_file, names=True)
hist = hist[hist['_rfac'] < 1.0]
hist.sort(order='_rfac')
hist_size = min(hist.shape[0], self.size_req - 1)
discarded_fields = {'_particle', '_gen', '_model'}
source_fields = set(hist.dtype.names) - discarded_fields
dest_fields = set(self.pos.dtype.names) - discarded_fields
common_fields = source_fields & dest_fields
if len(common_fields) < len(dest_fields):
logger.warning(BMsg("missing columns in history file {hf} default to random seed value.",
hf=history_file))
for name in common_fields:
self.pos[name][0:hist_size] = hist[name][0:hist_size]
self.pos['_particle'] = np.arange(self.size_req)
logger.info(BMsg("seeding swarm population with {hs} models from history file {hf}.",
hs=hist_size, hf=history_file))
if recalc_history:
self.pos['_gen'] = self.generation
self.pos['_model'] = np.arange(self.size_req)
self.pos['_rfac'] = 2.1
logger.info("historic models will be re-calculated.")
else:
self.pos['_gen'][0:hist_size] = -1
self.pos['_model'][0:hist_size] = -np.arange(hist_size) - 1
self.model_count = self.size_req - hist_size
self.pos['_model'][hist_size:] = np.arange(self.model_count)
logger.info("historic models will not be re-calculated.")
# seed last particle with start parameters
self.seed(self.model_start, index=-1)
# initialize best array
self.best = self.pos.copy()
self._hold_once = True
def randomize(self, pos=True, vel=True):
"""
initializes a random population.
the position array is filled with random values (uniform distribution) from the parameter domain.
velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
the method does not update the particle info fields.
@param pos: randomize positions. if False, the positions are not changed.
@param vel: randomize velocities. if False, the velocities are not changed.
"""
if pos:
for key in self.model_start:
self.pos[key] = ((self.model_max[key] - self.model_min[key]) *
np.random.random_sample(self.pos.shape) + self.model_min[key])
if vel:
for key in self.model_start:
self.vel[key] = ((self.model_max[key] - self.model_min[key]) *
(np.random.random_sample(self.pos.shape) - 0.5) / 4.0)
def seed(self, params, index=0):
"""
set the one of the particles to the specified seed values.
the method does not update the particle info fields.
@param params: dictionary of model parameters.
the keys must match the ones of domain.start.
@param index: index of the particle that is seeded.
the index must be in the allowed range of the self.pos array.
0 is the first, -1 the last particle.
"""
for key in params:
self.pos[key][index] = params[key]
def update_particle_info(self, index, inc_model=True):
"""
set the internal particle info fields.
the fields @c _particle, @c _gen, and @c _model are updated with the current values.
@c _rfac is set to the default value 2.1.
this method must be called after each change of particle position.
@param index: (int) particle index.
@param inc_model: (bool) if True, increment the model count afterwards.
@return: None
"""
self.pos['_particle'][index] = index
self.pos['_gen'][index] = self.generation
self.pos['_model'][index] = self.model_count
self.pos['_rfac'][index] = 2.1
if inc_model:
self.model_count += 1
def advance_population(self):
"""
advance the population by one step.
this method just calls advance_particle() for each particle of the population.
if generation is lower than zero, the method increases the generation number but does not advance the particles.
@return: None
"""
if not self._hold_once:
self.generation += 1
for index, __ in enumerate(self.pos):
self.advance_particle(index)
self._hold_once = False
def advance_particle(self, index):
"""
advance a particle by one step.
@param index: index of the particle in the population.
"""
# note: the following two identifiers are views,
# assignment will modify the original array
pos = self.pos[index]
vel = self.vel[index]
# best fit that this individual has seen
xl = self.best[index]
# best fit that a group of others have seen
xg = self.best_friend(index)
for key in self.model_start:
# update velocity
dxl = xl[key] - pos[key]
dxg = xg[key] - pos[key]
pv = np.random.random()
pl = np.random.random()
pg = np.random.random()
vel[key] = (self.momentum * pv * vel[key] +
self.attract_local * pl * dxl +
self.attract_global * pg * dxg)
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.velocity_constrain_mode)
# update position
pos[key] += vel[key]
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.position_constrain_mode)
self.update_particle_info(index)
@staticmethod
def constrain_velocity(_pos, _vel, _min, _max, _mode='default'):
"""
constrain a velocity to the given bounds.
@param _pos: current position of the particle.
@param _vel: new velocity of the particle, i.e. distance to move.
@param _min: lower position boundary.
@param _max: upper position boundary.
_max must be greater or equal to _min.
@param _mode: what to do if a boundary constraint is violated.
reserved for future use. should be set to 'default'.
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
in the current implementation only the velocity may change.
however, in future versions any of these values may change.
"""
d = abs(_max - _min) / 2.0
if d > 0.0:
while abs(_vel) >= d:
_vel /= 2.0
else:
_vel = 0.0
return _pos, _vel, _min, _max
@staticmethod
def constrain_position(_pos, _vel, _min, _max, _mode='default'):
"""
constrain a position to the given bounds.
@param _pos: new position of the particle, possible out of bounds.
@param _vel: velocity of the particle, i.e. distance from the previous position.
_vel must be lower than _max - _min.
@param _min: lower boundary.
@param _max: upper boundary.
_max must be greater or equal to _min.
@param _mode: what to do if a boundary constraint is violated:
@arg 're-enter': re-enter from the opposite side of the parameter interval.
@arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
@arg 'scatter': place the particle at a random place between its old position and the violated boundary.
@arg 'stick': place the particle at the violated boundary.
@arg 'expand': move the boundary so that the particle fits.
@arg 'random': place the particle at a random position between the lower and upper boundaries.
@arg 'default': the default mode is 'bounce'. this may change in future versions.
@return: tuple (new position, new velocity, new lower boundary, new upper boundary).
depending on the mode, any of these values may change.
the velocity is adjusted to be consistent with the change of position.
"""
_rng = max(_max - _min, 0.0)
_old = _pos - _vel
# prevent undershoot
if _vel > 0.0 and _pos < _min:
_pos = _min
_vel = _pos - _old
if _vel < 0.0 and _pos > _max:
_pos = _max
_vel = _pos - _old
assert abs(_vel) <= _rng, \
"velocity: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
assert (_vel >= 0 and _pos >= _min) or (_vel <= 0 and _pos <= _max), \
"undershoot: pos = {0}, min = {1}, max = {2}, vel = {3}, _rng = {4}".format(_pos, _min, _max, _vel, _rng)
if _rng > 0.0:
while _pos > _max:
if _mode == 're-enter':
_pos -= _rng
elif _mode == 'bounce' or _mode == 'default':
_pos = _max - (_pos - _max)
_vel = -_vel
elif _mode == 'scatter':
_pos = _old + (_max - _old) * np.random.random()
_vel = _pos - _old
elif _mode == 'stick':
_pos = _max
_vel = _pos - _old
elif _mode == 'expand':
_max = _pos
elif _mode == 'random':
_pos = _min + _rng * np.random.random()
_vel = _pos - _old
else:
raise ValueError('invalid constrain mode')
while _pos < _min:
if _mode == 're-enter':
_pos += _rng
elif _mode == 'bounce' or _mode == 'default':
_pos = _min - (_pos - _min)
_vel = -_vel
elif _mode == 'scatter':
_pos = _old + (_min - _old) * np.random.random()
_vel = _pos - _old
elif _mode == 'stick':
_pos = _min
_vel = _pos - _old
elif _mode == 'expand':
_min = _pos
elif _mode == 'random':
_pos = _min + _rng * np.random.random()
_vel = _pos - _old
else:
raise ValueError('invalid constrain mode')
else:
_pos = _max
_vel = 0.0
return _pos, _vel, _min, _max
# noinspection PyUnusedLocal
def best_friend(self, index):
"""
select the best fit out of a random set of particles
returns the "best friend"
"""
friends = np.random.choice(self.best, self.friends, replace=False)
index = np.argmin(friends['_rfac'])
return friends[index]
def add_result(self, particle, rfac):
"""
add a calculation particle to the results array, and update the best fit array.
@param particle: dictionary of model parameters and particle values.
the keys must correspond to the columns of the pos array,
i.e. the names of the model parameters plus the _rfac, _particle, and _model fields.
@param rfac: calculated R-factor.
the R-factor is written to the '_rfac' field.
@return better (bool): True if the new R-factor is better than the particle's previous best mark.
"""
particle['_rfac'] = rfac
l = [particle[n] for n in self.results.dtype.names]
t = tuple(l)
a = np.asarray(t, dtype=self.results.dtype)
self.results = np.append(self.results, a)
index = particle['_particle']
better = particle['_rfac'] < self.best['_rfac'][index]
if better:
self.best[index] = a
return better
def is_converged(self, tol=0.01):
"""
check whether the population has converged.
convergence is reached when the R-factors of the N latest results,
do not vary more than tol, where N is the size of the population.
@param tol: max. difference allowed between greatest and lowest value of the R factor in the population.
"""
nres = self.results.shape[0]
npop = self.pos.shape[0]
if nres >= npop:
rfac1 = np.min(self.results['_rfac'][-npop:])
rfac2 = np.max(self.results['_rfac'][-npop:])
converg = rfac2 - rfac1 < tol
return converg
else:
return False
def save_array(self, filename, array):
"""
save a population array to a text file.
the columns are space-delimited.
the first line contains the column names.
@param filename: name of destination file, optionally including a path.
@param array: population array to save.
must be one of self.pos, self.vel, self.best, self.results
"""
header = " ".join(self.results.dtype.names)
np.savetxt(filename, array, fmt='%g', header=header)
def load_array(self, filename, array):
"""
load a population array from a text file.
the array to load must be compatible with the current population
(same number of rows, same columns).
the first row must contain column names.
the ordering of columns may be different.
the returned array is ordered according to the array argument.
@param filename: name of source file, optionally including a path.
@param array: population array to load.
must be one of self.pos, self.vel, self.results.
@return array with loaded data.
this may be the same instance as on input.
@raise AssertionError if the number of rows of the two files differ.
"""
data = np.genfromtxt(filename, names=True)
assert data.shape == array.shape
for name in data.dtype.names:
array[name] = data[name]
return array
def save_population(self, base_filename):
"""
save the population array to a set of text files.
the file name extensions are .pos, .vel, and .best
"""
self.save_array(base_filename + ".pos", self.pos)
self.save_array(base_filename + ".vel", self.vel)
self.save_array(base_filename + ".best", self.best)
def load_population(self, base_filename):
"""
load the population array from a set of previously saved text files.
this can be used to continue an optimization job.
the file name extensions are .pos, .vel, and .best.
the files must have the same format as produced by save_population.
the files must have the same number of rows.
"""
self.pos = self.load_array(base_filename + ".pos", self.pos)
self.vel = self.load_array(base_filename + ".vel", self.vel)
self.best = self.load_array(base_filename + ".best", self.best)
def save_results(self, filename):
"""
saves the complete list of calculations results.
"""
self.save_array(filename, self.results)
class ParticleSwarmHandler(handlers.ModelHandler):
"""
model handler which implements the particle swarm optimization algorithm.
"""
## @var _pop (Population)
# holds the population object.
## @var _pop_size (int)
# number of particles in the swarm.
## @var _outfile (file)
# output file for model parametes and R factor.
# the file is open during calculations.
# each calculation result adds one line.
## @var _model_time (timedelta)
# estimated CPU time to calculate one model.
# this value is the maximum time measured of the completed calculations.
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
## @var _converged (bool)
# indicates that the population has converged.
# convergence is detected by calling Population.is_converged().
# once convergence has been reached, this flag is set, and further convergence tests are skipped.
## @var _timeout (bool)
# indicates when the handler has run out of time,
# i.e. time is up before convergence has been reached.
# if _timeout is True, create_tasks() will not create further tasks,
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
## @var _invalid_limit (int)
# maximum tolerated number of invalid calculations.
#
# if the number of invalid calculations (self._invalid_count) exceeds this limit,
# the optimization is aborted.
# the variable is initialized by self.setup() to 10 times the population size.
def __init__(self):
super(ParticleSwarmHandler, self).__init__()
self._pop = None
self._pop_size = 0
self._outfile = None
self._model_time = datetime.timedelta()
self._converged = False
self._timeout = False
self._invalid_limit = 10
def setup(self, project, slots):
"""
initialize the particle swarm and open an output file.
the population size is set to project.pop_size if it is defined and greater than 4.
otherwise, it defaults to <code>max(2 * slots, 4)</code>.
for good efficiency the population size (number of particles) should be
greater or equal to the number of available processing slots,
otherwise the next generation is created before all particles have been calculated
which may slow down convergence.
if calculations take a long time compared to the available computation time
or spawn a lot of sub-tasks due to complex symmetry,
and you prefer to allow for a good number of generations,
you should override the population size.
@param project: project instance.
@param slots: number of calculation processes available through MPI.
@return: None
"""
super(ParticleSwarmHandler, self).setup(project, slots)
_min_size = 4
if project.pop_size:
self._pop_size = max(project.pop_size, _min_size)
else:
self._pop_size = max(self._slots * 2, _min_size)
self._pop = Population()
self._pop.setup(self._pop_size, self._project.create_domain(), self._project.history_file,
self._project.recalc_history)
self._invalid_limit = self._pop_size * 10
self._outfile = open(self._project.output_file + ".dat", "w")
self._outfile.write("# ")
self._outfile.write(" ".join(self._pop.results.dtype.names))
self._outfile.write("\n")
return None
def cleanup(self):
self._outfile.close()
super(ParticleSwarmHandler, self).cleanup()
def create_tasks(self, parent_task):
"""
develop the particle population and create a calculation task per particle.
this method advances the population by one step.
it generates one task for each particle if its model number is positive.
negative model numbers indicate that the particle is used for seeding
and does not need to be calculated in the first generation.
if the time limit is approaching, no new tasks are created.
the process loop calls this method every time the length of the task queue drops
below the number of calculation processes (slots).
this means in particular that a population will not be completely calculated
before the next generation starts.
for efficiency reasons, we do not wait until a population is complete.
this will cause a certain mixing of generations and slow down convergence
because the best peer position in the generation may not be known yet.
the effect can be reduced by making the population larger than the number of processes.
@return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
"""
super(ParticleSwarmHandler, self).create_tasks(parent_task)
# this is the top-level handler, we expect just one parent: root.
parent_id = parent_task.id
assert parent_id == (-1, -1, -1, -1, -1)
self._parent_tasks[parent_id] = parent_task
time_pending = self._model_time * len(self._pending_tasks)
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
out_tasks = []
if not self._timeout and not self._converged:
self._pop.advance_population()
for pos in self._pop.pos_gen():
time_pending += self._model_time
if time_pending > time_avail:
self._timeout = True
logger.info("time limit reached")
break
if pos['_model'] >= 0:
new_task = parent_task.copy()
new_task.parent_id = parent_id
new_task.model = pos
new_task.change_id(model=pos['_model'])
child_id = new_task.id
self._pending_tasks[child_id] = new_task
out_tasks.append(new_task)
return out_tasks
def add_result(self, task):
"""
calculate the R factor of the result and add it to the results list of the population.
* save the current population.
* append the result to the result output file.
* update the execution time statistics.
* remove temporary files if requested.
* check whether the population has converged.
@return parent task (CalculationTask) if the optimization has converged, @c None otherwise.
"""
super(ParticleSwarmHandler, self).add_result(task)
self._complete_tasks[task.id] = task
del self._pending_tasks[task.id]
parent_task = self._parent_tasks[task.parent_id]
rfac = 1.0
if task.result_valid:
try:
rfac = self._project.calc_rfactor(task)
except ValueError:
task.result_valid = False
self._invalid_count += 1
logger.warning(BMsg("calculation of model {0} resulted in an undefined R-factor.", task.id.model))
task.model['_rfac'] = rfac
self._pop.add_result(task.model, rfac)
self._pop.save_population(self._project.output_file + ".pop")
if self._outfile:
s = (str(task.model[name]) for name in self._pop.results.dtype.names)
self._outfile.write(" ".join(s))
self._outfile.write("\n")
self._outfile.flush()
self._project.files.update_model_rfac(task.id.model, rfac)
self._project.files.set_model_complete(task.id.model, True)
if task.result_valid:
if self._pop.is_converged() and not self._converged:
logger.info("population converged")
self._converged = True
if task.time > self._model_time:
self._model_time = task.time
else:
if self._invalid_count >= self._invalid_limit:
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
self._converged = True
# optimization complete?
if (self._timeout or self._converged) and len(self._pending_tasks) == 0:
del self._parent_tasks[parent_task.id]
else:
parent_task = None
self.cleanup_files(keep=self._pop_size)
return parent_task