update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100
tagged rev-master-2.0.0
This commit is contained in:
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions

View File

308
pmsco/optimizers/genetic.py Normal file
View File

@ -0,0 +1,308 @@
"""
@package pmsco.optimizers.genetic
genetic optimization algorithm.
this module implements a genetic algorithm for structural optimization.
the genetic algorithm is adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)
the genetic algorithm evolves a population of individuals
by a combination of inheritance, crossover and mutation
and R-factor based selection.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2018 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import random
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class GeneticPopulation(population.Population):
"""
population implementing a genetic optimization algorithm.
the genetic algorithm implements the following principles:
1. inheritance: two children of a new generation are generated from the genes (i.e. model parameters)
of two parents of the old generation.
2. elitism: individuals with similar r-factors are more likely to mate.
3. crossover: the genes of the parents are randomly distributed to their children.
4. mutation: a gene may mutate at random.
5. selection: the globally best individual is added to a parent population (and replaces the worst).
the main tuning parameter of the algorithm is the mutation_step which is copied from the domain.step.
it defines the width of a gaussian distribution of change under a weak mutation.
it should be large enough so that the whole parameter space can be probed,
but small enough that a frequent mutation does not throw the individual out of the convergence region.
typically, the step should be of the order of the parameter range divided by the population size.
other tunable parameters are the mating_factor, the weak_mutation_probability and the strong_mutation_probability.
the defaults should normally be fine.
"""
## @var weak_mutation_probability
#
# probability (between 0 and 1) that a parameter changes in the mutate_weak() method.
#
# the default is 1.0, i.e., each parameter mutates in each generation.
#
# 1.0 has shown better coverage of the continuous parameter space and faster finding of the optimum.
## @var strong_mutation_probability
#
# probability (between 0 and 1) that a parameter changes in the mutate_strong() method.
#
# the default is 0.01, i.e., on average, every hundredth probed parameter is affected by a strong mutation.
# if the model contains 10 parameters, for example,
# every tenth particle would see a mutation of at least one of its parameters.
#
# too high value may disturb convergence,
# too low value may trap the algorithm in a local optimum.
## @var mating_factor
#
# inverse width of the mating preference distribution.
#
# the greater this value, the more similar partners are mated by the mate_parents() method.
#
# the default value 4.0 results in a probability of about 0.0025
# that the best particle mates the worst.
## @var position_constrain_mode
#
# the position constrain mode selects what to do if a particle violates the parameter limits.
#
# the default is "random" which resets the parameter to a random value.
## @var mutation_step
#
# standard deviations of the exponential distribution function used in the mutate_weak() method.
# the variable is a dictionary with the same keys as model_step (the parameter domain).
#
# it is initialized from the domain.step
# or set to a default value based on the parameter range and population size.
def __init__(self):
"""
initialize the population object.
"""
super(GeneticPopulation, self).__init__()
self.weak_mutation_probability = 1.0
self.strong_mutation_probability = 0.01
self.mating_factor = 4.
self.position_constrain_mode = 'random'
self.mutation_step = {}
def setup(self, size, domain, **kwargs):
"""
@copydoc Population.setup()
in addition to the inherited behaviour, this method initializes self.mutation_step.
mutation_step of a parameter is set to its domain.step if non-zero.
otherwise it is set to the parameter range divided by the population size.
"""
super(GeneticPopulation, self).setup(size, domain, **kwargs)
for key in self.model_step:
val = self.model_step[key]
self.mutation_step[key] = val if val != 0 else (self.model_max[key] - self.model_min[key]) / size
def randomize(self, pos=True, vel=True):
"""
initializes a "random" population.
this implementation is a new proposal.
the distribution is not completely random.
rather, a position vector (by parameter) is initialized with a linear function
that covers the parameter domain.
the linear function is then permuted randomly.
the method does not update the particle info fields.
@param pos: randomize positions. if False, the positions are not changed.
@param vel: randomize velocities. if False, the velocities are not changed.
"""
if pos:
for key in self.model_start:
self.pos[key] = np.random.permutation(np.linspace(self.model_min[key], self.model_max[key],
self.pos.shape[0]))
if vel:
for key in self.model_start:
d = (self.model_max[key] - self.model_min[key]) / 8
self.vel[key] = np.random.permutation(np.linspace(-d, d, self.vel.shape[0]))
def advance_population(self):
"""
advance the population by one generation.
the population is advanced in several steps:
1. replace the worst individual by the best found so far.
2. mate the parents in pairs of two.
3. produce children by crossover from the parents.
4. apply weak mutations.
5. apply strong mutations.
if generation is lower than zero, the method increases the generation number but does not advance the particles.
@return: None
"""
if not self._hold_once:
self.generation += 1
pop = self.pos.copy()
pop.sort(order='_rfac')
elite = self.best.copy()
elite.sort(order='_rfac')
if elite[0]['_model'] not in pop['_model']:
elite[0]['_particle'] = pop[-1]['_particle']
pop[-1] = elite[0]
pop.sort(order='_rfac')
parents = self.mate_parents(pop)
children = []
for x, y in parents:
a, b = self.crossover(x, y)
children.append(a)
children.append(b)
for child in children:
index = child['_particle']
self.mutate_weak(child, self.weak_mutation_probability)
self.mutate_strong(child, self.strong_mutation_probability)
self.mutate_duplicate(child)
for key in self.model_start:
vel = child[key] - self.pos[index][key]
child[key], vel, self.model_min[key], self.model_max[key] = \
self.constrain_position(child[key], vel, self.model_min[key], self.model_max[key],
self.position_constrain_mode)
self.pos[index] = child
self.update_particle_info(index)
super(GeneticPopulation, self).advance_population()
def mate_parents(self, positions):
"""
group the population in pairs of two.
to mate two individuals, the first individual of the (remaining) population selects one of the following
with an exponential preference of earlier ones.
the process is repeated until all individuals are mated.
@param positions: original population (numpy structured array)
the population should be ordered with best model first.
@return: sequence of pairs (tuples) of structured arrays holding one model each.
"""
seq = [model for model in positions]
parents = []
while len(seq) >= 2:
p1 = seq.pop(0)
ln = len(seq)
i = min(int(random.expovariate(self.mating_factor / ln) * ln), ln - 1)
p2 = seq.pop(i)
parents.append((p1, p2))
return parents
def crossover(self, parent1, parent2):
"""
crossover two parents to create two children.
for each model parameter, the parent's value is randomly assigned to either one of the children.
@param parent1: numpy structured array holding the model of the first parent.
@param parent2: numpy structured array holding the model of the second parent.
@return: tuple of the two crossed children.
these are two new ndarray instances that are independent of their parents.
"""
child1 = parent1.copy()
child2 = parent2.copy()
for key in self.model_start:
if random.random() >= 0.5:
child1[key], child2[key] = parent2[key], parent1[key]
return child1, child2
def mutate_weak(self, model, probability):
"""
apply a weak mutation to a model.
each parameter is changed to a different value in the domain of the parameter at the given probability.
the amount of change has a gaussian distribution with a standard deviation of mutation_step.
@param[in,out] model: structured numpy.ndarray holding the model parameters.
model is modified in place.
@param probability: probability between 0 and 1 at which to change a parameter.
0 = no change, 1 = force change.
@return: model (same instance as the @c model input argument).
"""
for key in self.model_start:
if random.random() < probability:
model[key] += random.gauss(0, self.mutation_step[key])
return model
def mutate_strong(self, model, probability):
"""
apply a strong mutation to a model.
each parameter is changed to a random value in the domain of the parameter at the given probability.
@param[in,out] model: structured numpy.ndarray holding the model parameters.
model is modified in place.
@param probability: probability between 0 and 1 at which to change a parameter.
0 = no change, 1 = force change.
@return: model (same instance as the @c model input argument).
"""
for key in self.model_start:
if random.random() < probability:
model[key] = (self.model_max[key] - self.model_min[key]) * random.random() + self.model_min[key]
return model
def mutate_duplicate(self, model):
"""
mutate a model if it is identical to a previously calculated one.
if the model was calculated before, the mutate_weak mutation is applied with probability 1.
@param[in,out] model: structured numpy.ndarray holding the model parameters.
model is modified in place.
@return: model (same instance as the @c model input argument).
"""
try:
self.find_model(model)
self.mutate_weak(model, 1.0)
except ValueError:
pass
return model
class GeneticOptimizationHandler(population.PopulationHandler):
"""
model handler which implements a genetic algorithm.
"""
def __init__(self):
super(GeneticOptimizationHandler, self).__init__()
self._pop = GeneticPopulation()

View File

@ -0,0 +1,280 @@
"""
gradient optimization module for MSC calculations
the module starts multiple MSC calculations and optimizes the model parameters
with a gradient search.
the optimization task is distributed over multiple processes using MPI.
the optimization must be started with N+1 processes in the MPI environment,
where N equals the number of fit parameters.
IMPLEMENTATION IN PROGRESS - DEBUGGING
Requires: scipy, numpy
Author: Matthias Muntwiler
Copyright (c) 2015 by Paul Scherrer Institut
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import numpy as np
import scipy.optimize as so
import data as md
from mpi4py import MPI
# messages sent from master to slaves
# master sends new assignment
# the message is a dictionary of model parameters
TAG_NEW_TASK = 1
# master calls end of calculation
# the message is empty
TAG_FINISH = 2
# master sends current population
# currently not used
TAG_POPULATION = 2
# messages sent from slaves to master
# slave reports new result
# the message is a dictionary of model parameters and results
TAG_NEW_RESULT = 1
# slave confirms end of calculation
# currently not used
TAG_FINISHED = 2
class MscProcess(object):
"""
Code shared by MscoMaster and MscoSlave
"""
def __init__(self, comm):
self.comm = comm
def setup(self, project):
self.project = project
self.running = False
self.finishing = False
self.iteration = 0
def run(self):
pass
def cleanup(self):
pass
def calc(self, pars):
"""
Executes a single MSC calculation.
pars: A dictionary of parameters expected by the cluster and parameters functions.
returns: pars with three additional values:
rank: rank of the calculation process
index: iteration index of the calculation process
rfac: resulting R-factor
all other calculation results are discarded.
"""
rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration)
# create parameter and cluster structures
clu = self.project.create_cluster(pars)
par = self.project.create_params(pars)
# generate file names
base_filename = "%s_%u_%u" % (self.project.output_file, self.comm.rank, self.iteration)
# call the msc program
result_etpi = self.project.run_calc(par, clu, self.project.scan_file, base_filename, delete_files=True)
# calculate modulation function and R-factor
result_etpi = md.calc_modfunc_lowess(result_etpi)
result_r = md.rfactor(self.project.scan_modf, result_etpi)
pars['rank'] = self.comm.rank
pars['iter'] = self.iteration
pars['rfac'] = result_r
return pars
class MscMaster(MscProcess):
def __init__(self, comm):
super(MscMaster, self).__init__(comm)
self.slaves = self.comm.Get_size() - 1
self.running_slaves = 0
def setup(self, project):
super(MscMaster, self).setup(project)
self.dom = project.create_domain()
self.running_slaves = self.slaves
self._outfile = open(self.project.output_file + ".dat", "w")
self._outfile.write("#")
self._outfile_keys = self.dom.start.keys()
self._outfile_keys.append('rfac')
for name in self._outfile_keys:
self._outfile.write(" " + name)
self._outfile.write("\n")
def run(self):
"""
starts the minimization
"""
# pack initial guess, bounds, constant parameters
nparams = len(self.dom.start)
fit_params = np.zeros((nparams))
params_index = {}
const_params = self.dom.max.copy()
bounds = []
n_fit_params = 0
for key in self.dom.start:
if self.dom.max[key] > self.dom.min[key]:
fit_params[n_fit_params] = self.dom.start[key]
params_index[key] = n_fit_params
n_fit_params += 1
bounds.append((self.dom.min[key], self.dom.max[key]))
fit_params.resize((n_fit_params))
fit_result = so.minimize(self._minfunc, fit_params,
args=(params_index, const_params),
method='L-BFGS-B', jac=True, bounds=bounds)
msc_result = const_params.copy()
for key, index in params_index.items():
msc_result[key] = fit_result.x[index]
msc_result['rfac'] = fit_result.fun
self._outfile.write("# result of gradient optimization\n")
self._outfile.write("# success = {0}, iterations = {1}, calculations = {2}\n".format(fit_result.success, fit_result.nit, fit_result.nfev))
self._outfile.write("# message: {0}\n".format(fit_result.message))
for name in self._outfile_keys:
self._outfile.write(" " + str(msc_result[name]))
self._outfile.write("\n")
def _minfunc(self, fit_params, params_index, const_params):
"""
function to be minimized
fit_params (numpy.ndarray): current fit position
master (MscoMaster): reference to the master process
params_index (dict): dictionary of fit parameters
and their index in fit_params.
key=MSC parameter name, value=index to fit_params.
const_params (dict): dictionary of MSC parameters
holding (at least) the constant parameter values.
a copy of this instance, updated with the current fit position,
is passed to MSC.
"""
# unpack parameters
msc_params = const_params.copy()
for key, index in params_index.items():
msc_params[key] = fit_params[index]
# run MSC calculations
rfac, jac_dict = self.run_msc_calcs(msc_params, params_index)
# pack jacobian
jac_arr = np.zeros_like(fit_params)
for key, index in params_index.items():
jac_arr[index] = jac_dict[key]
return rfac, jac_arr
def run_msc_calcs(self, params, params_index):
"""
params: dictionary of actual parameters
params_index: dictionary of fit parameter indices.
only the keys are used here
to decide for which parameters the derivative is calculated.
returns:
(float) R-factor at the params location
(dict) approximate gradient at the params location
"""
# distribute tasks for gradient
slave_rank = 1
for key in params_index:
params2 = params.copy()
params2[key] += self.dom.step[key]
params2['key'] = key
self.comm.send(params2, dest=slave_rank, tag=TAG_NEW_TASK)
slave_rank += 1
# run calculation for actual position
result0 = self.calc(params)
for name in self._outfile_keys:
self._outfile.write(" " + str(result0[name]))
self._outfile.write("\n")
# gather results
s = MPI.Status()
jacobian = params.copy()
for slave in range(1, slave_rank):
result1 = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
if s.tag == TAG_NEW_RESULT:
key = result1['key']
jacobian[key] = (result1['rfac'] - result0['rfac']) / (result1[key] - result0[key])
for name in self._outfile_keys:
self._outfile.write(" " + str(result1[name]))
self._outfile.write("\n")
self._outfile.flush()
return result0['rfac'], jacobian
def cleanup(self):
"""
cleanup: close output file, terminate slave processes
"""
self._outfile.close()
for rank in range(1, self.running_slaves + 1):
self.comm.send(None, dest=rank, tag=TAG_FINISH)
super(MscMaster, self).cleanup()
class MscSlave(MscProcess):
def run(self):
"""
Waits for messages from the master and dispatches tasks.
"""
s = MPI.Status()
self.running = True
while self.running:
data = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
if s.tag == TAG_NEW_TASK:
self.accept_task(data)
elif s.tag == TAG_FINISH:
self.running = False
def accept_task(self, pars):
"""
Executes a calculation task and returns the result to the master.
"""
result = self.calc(pars)
self.comm.send(result, dest=0, tag=TAG_NEW_RESULT)
self.iteration += 1
def optimize(project):
"""
main entry point for optimization
rank 0: starts the calculation, distributes tasks
ranks 1...N-1: work on assignments from rank 0
"""
mpi_comm = MPI.COMM_WORLD
mpi_rank = mpi_comm.Get_rank()
if mpi_rank == 0:
master = MscMaster(mpi_comm)
master.setup(project)
master.run()
master.cleanup()
else:
slave = MscSlave(mpi_comm)
slave.setup(project)
slave.run()
slave.cleanup()

421
pmsco/optimizers/grid.py Normal file
View File

@ -0,0 +1,421 @@
"""
@package pmsco.grid
grid search optimization handler.
the module starts multiple MSC calculations and varies parameters on a fixed coordinate grid.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import math
import numpy as np
import logging
from pmsco.compat import open
import pmsco.handlers as handlers
import pmsco.graphics as graphics
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class GridPopulation(object):
"""
grid population.
"""
## @var model_start
# (dict) initial model parameters.
# read-only. call setup() to change this attribute.
## @var model_min
# (dict) low limits of the model parameters.
# read-only. call setup() to change this attribute.
## @var model_max
# (dict) high limits of the model parameters.
# if min == max, the parameter is kept constant.
# read-only. call setup() to change this attribute.
## @var model_max
# (dict) high limits of the model parameters.
# read-only. call setup() to change this attribute.
## @var model_step
# (dict) initial velocity (difference between two steps) of the particle.
# read-only. call setup() to change this attribute.
## @var model_count
# number of models (grid points).
# initial value = 0.
## @var positions
# (numpy.ndarray) flat list of grid coordinates and results.
#
# the column names include the names of the model parameters, taken from domain.start,
# and the special names @c '_model', @c '_rfac'.
# the special fields have the following meanings:
#
# * @c '_model': model number.
# the model number counts identifies the grid point.
# the field is used to associate the result of a calculation with the coordinate vector.
# the model handlers use it to derive their model ID.
#
# * @c '_rfac': calculated R-factor for this position.
# it is set by the add_result() method.
#
# @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
# this object is a <em>view</em> of the original array item
def __init__(self):
"""
initialize the population object.
"""
self.model_start = {}
self.model_min = {}
self.model_max = {}
self.model_step = {}
self.model_count = 0
self.positions = None
self.search_keys = []
self.fixed_keys = []
@staticmethod
def get_model_dtype(model_params):
"""
get numpy array data type for model parameters and grid control variables.
@param model_params: dictionary of model parameters or list of parameter names.
@return: dtype for use with numpy array constructors.
this is a sorted list of (name, type) tuples.
"""
dt = []
for key in model_params:
dt.append((key, 'f4'))
dt.append(('_model', 'i4'))
dt.append(('_rfac', 'f4'))
dt.sort(key=lambda t: t[0].lower())
return dt
def setup(self, domain):
"""
set up the population and result arrays.
@param domain: definition of initial and limiting model parameters
expected by the cluster and parameters functions.
the attributes have the following meanings:
@arg start: values of the fixed parameters.
@arg min: minimum values allowed.
@arg max: maximum values allowed.
if abs(max - min) < step/2 , the parameter is kept constant.
@arg step: step size (distance between two grid points).
if step <= 0, the parameter is kept constant.
"""
self.model_start = domain.start
self.model_min = domain.min
self.model_max = domain.max
self.model_step = domain.step
self.model_count = 1
self.search_keys = []
self.fixed_keys = []
scales = []
for p in domain.step.keys():
if domain.step[p] > 0:
n = np.round((domain.max[p] - domain.min[p]) / domain.step[p]) + 1
else:
n = 1
if n > 1:
self.search_keys.append(p)
scales.append(np.linspace(domain.min[p], domain.max[p], n))
else:
self.fixed_keys.append(p)
# scales is a list of 1D arrays that hold the coordinates of the individual dimensions
# nd_positions is a list of N-D arrays that hold the coordinates in all multiple dimensions
# flat_positions is a list of 1D arrays that hold the coordinates in flat sequence
if len(scales) > 1:
positions_nd = np.meshgrid(*scales, indexing='ij')
positions_flat = [arr.flatten() for arr in positions_nd]
else:
positions_flat = scales
self.model_count = positions_flat[0].shape[0]
# shuffle the calculation order so that we may see the more interesting parts earlier
shuffle_index = np.arange(self.model_count)
np.random.shuffle(shuffle_index)
positions_reordered = [pos[shuffle_index] for pos in positions_flat]
dt = self.get_model_dtype(self.model_min)
# positions
self.positions = np.zeros(self.model_count, dtype=dt)
for idx, key in enumerate(self.search_keys):
self.positions[key] = positions_reordered[idx]
for idx, key in enumerate(self.fixed_keys):
self.positions[key] = self.model_start[key]
self.positions['_model'] = np.arange(self.model_count)
self.positions['_rfac'] = 2.1
def add_result(self, particle, rfac):
"""
add a calculation particle to the results array.
@param particle: dictionary of model parameters and particle values.
the keys must correspond to the columns of the pos array,
i.e. the names of the model parameters plus the _rfac, and _model fields.
@param rfac: calculated R-factor.
the R-factor is written to the '_rfac' field.
@return None
"""
model = particle['_model']
self.positions['_rfac'][model] = rfac
def save_array(self, filename, array):
"""
saves a population array to a text file.
@param array: population array to save.
must be one of self.pos, self.vel, self.best, self.results
"""
header = " ".join(self.positions.dtype.names)
np.savetxt(filename, array, fmt='%g', header=header)
def load_array(self, filename, array):
"""
load a population array from a text file.
the array to load must be compatible with the current population
(same number of rows, same columns).
the first row must contain column names.
the ordering of columns may be different.
the returned array is ordered according to the array argument.
@param array: population array to load.
must be one of self.pos, self.vel, self.results.
@return array with loaded data.
this may be the same instance as on input.
@raise AssertionError if the number of rows of the two files differ.
"""
data = np.genfromtxt(filename, names=True)
assert data.shape == array.shape
for name in data.dtype.names:
array[name] = data[name]
return array
def save_population(self, base_filename):
"""
saves the population array to a set of text files.
the file name extensions are .pos, .vel, and .best
"""
self.save_array(base_filename + ".pos", self.positions)
def load_population(self, base_filename):
"""
loads the population array from a set of previously saved text files.
this can be used to continue an optimization job.
the file name extensions are .pos, .vel, and .best.
the files must have the same format as produced by save_population.
the files must have the same number of rows.
"""
self.load_array(base_filename + ".pos", self.positions)
def save_results(self, filename):
"""
saves the complete list of calculations results.
"""
self.save_array(filename, self.positions)
class GridSearchHandler(handlers.ModelHandler):
"""
model handler which implements the grid search algorithm.
"""
## @var _pop (Population)
# holds the population object.
## @var _outfile (file)
# output file for model parametes and R factor.
# the file is open during calculations.
# each calculation result adds one line.
## @var _model_time (timedelta)
# estimated CPU time to calculate one model.
# this value is the maximum time measured of the completed calculations.
# it is used to determine when the optimization should be finished so that the time limit is not exceeded.
## @var _timeout (bool)
# indicates when the handler has run out of time,
# i.e. time is up before convergence has been reached.
# if _timeout is True, create_tasks() will not create further tasks,
# and add_result() will signal completion when the _pending_tasks queue becomes empty.
def __init__(self):
super(GridSearchHandler, self).__init__()
self._pop = None
self._outfile = None
self._model_time = datetime.timedelta()
self._timeout = False
self._invalid_limit = 10
self._next_model = 0
def setup(self, project, slots):
"""
initialize the particle swarm and open an output file.
@param project:
@param slots: number of calculation processes available through MPI.
for efficiency reasons we set the population size twice the number of available slots.
the minimum number of slots is 1, the recommended value is 10 or greater.
the population size is set to at least 4.
@return:
"""
super(GridSearchHandler, self).setup(project, slots)
self._pop = GridPopulation()
self._pop.setup(self._project.create_domain())
self._invalid_limit = max(slots, self._invalid_limit)
self._outfile = open(self._project.output_file + ".dat", "w")
self._outfile.write("# ")
self._outfile.write(" ".join(self._pop.positions.dtype.names))
self._outfile.write("\n")
return None
def cleanup(self):
self._outfile.close()
super(GridSearchHandler, self).cleanup()
def create_tasks(self, parent_task):
"""
develop the particle population and create a calculation task per particle.
this method advances the population by one step, and generates one task per particle.
during the first call, the method first sets up a new population.
the process loop calls this method every time the length of the task queue drops
below the number of calculation processes (slots).
@return list of generated tasks. empty list if all grid points have been calculated.
"""
super(GridSearchHandler, self).create_tasks(parent_task)
# this is the top-level handler, we expect just one parent: root.
parent_id = parent_task.id
assert parent_id == (-1, -1, -1, -1, -1)
self._parent_tasks[parent_id] = parent_task
time_pending = self._model_time * len(self._pending_tasks)
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
out_tasks = []
time_pending += self._model_time
if time_pending > time_avail:
self._timeout = True
model = self._next_model
if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
new_task = parent_task.copy()
new_task.parent_id = parent_id
pos = self._pop.positions[model]
new_task.model = {k: pos[k] for k in pos.dtype.names}
new_task.change_id(model=model)
child_id = new_task.id
self._pending_tasks[child_id] = new_task
out_tasks.append(new_task)
self._next_model += 1
return out_tasks
def add_result(self, task):
"""
calculate the R factor of the result and store it in the positions array.
* append the result to the result output file.
* update the execution time statistics.
* remove temporary files if requested.
* check whether the grid search is complete.
@return parent task (CalculationTask) if the search is complete, @c None otherwise.
"""
super(GridSearchHandler, self).add_result(task)
self._complete_tasks[task.id] = task
del self._pending_tasks[task.id]
parent_task = self._parent_tasks[task.parent_id]
if task.result_valid:
assert not math.isnan(task.rfac)
task.model['_rfac'] = task.rfac
self._pop.add_result(task.model, task.rfac)
if self._outfile:
s = (str(task.model[name]) for name in self._pop.positions.dtype.names)
self._outfile.write(" ".join(s))
self._outfile.write("\n")
self._outfile.flush()
self._project.files.update_model_rfac(task.id.model, task.rfac)
self._project.files.set_model_complete(task.id.model, True)
if task.result_valid:
if task.time > self._model_time:
self._model_time = task.time
else:
self._invalid_count += 1
# grid search complete?
if len(self._pending_tasks) == 0:
del self._parent_tasks[parent_task.id]
else:
parent_task = None
self.cleanup_files()
return parent_task
def save_report(self, root_task):
"""
generate a graphical summary of the optimization.
@param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
@return: None
"""
super(GridSearchHandler, self).save_report(root_task)
files = graphics.rfactor.render_results(self._project.output_file + ".dat", self._pop.positions)
for f in files:
self._project.files.add_file(f, root_task.id.model, "report")

File diff suppressed because it is too large Load Diff

139
pmsco/optimizers/swarm.py Normal file
View File

@ -0,0 +1,139 @@
"""
@package pmsco.optimizers.swarm
particle swarm optimization handler.
the module starts multiple MSC calculations and optimizes the model parameters
according to the particle swarm optimization algorithm.
Particle swarm optimization adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class SwarmPopulation(population.Population):
"""
particle swarm population.
"""
## @var friends
# number of other particles that each particle consults for the global best fit.
# default = 3.
## @var momentum
# momentum of the particle.
# default = 0.689343.
## @var attract_local
# preference for returning to the local best fit
# default = 1.92694.
## @var attract_global
# preference for heading towards the global best fit.
# default = 1.92694
def __init__(self):
"""
initialize the population object.
"""
super(SwarmPopulation, self).__init__()
self.friends = 3
self.momentum = 0.689343
self.attract_local = 1.92694
self.attract_global = 1.92694
self.position_constrain_mode = 'default'
self.velocity_constrain_mode = 'default'
def advance_population(self):
"""
advance the population by one step.
this method just calls advance_particle() for each particle of the population.
if generation is lower than zero, the method increases the generation number but does not advance the particles.
@return: None
"""
if not self._hold_once:
self.generation += 1
for index, __ in enumerate(self.pos):
self.advance_particle(index)
super(SwarmPopulation, self).advance_population()
def advance_particle(self, index):
"""
advance a particle by one step.
@param index: index of the particle in the population.
"""
# note: the following two identifiers are views,
# assignment will modify the original array
pos = self.pos[index]
vel = self.vel[index]
# best fit that this individual has seen
xl = self.best[index]
# best fit that a group of others have seen
xg = self.best_friend(index)
for key in self.model_start:
# update velocity
dxl = xl[key] - pos[key]
dxg = xg[key] - pos[key]
pv = np.random.random()
pl = np.random.random()
pg = np.random.random()
vel[key] = (self.momentum * pv * vel[key] +
self.attract_local * pl * dxl +
self.attract_global * pg * dxg)
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.velocity_constrain_mode)
# update position
pos[key] += vel[key]
pos[key], vel[key], self.model_min[key], self.model_max[key] = \
self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
self.position_constrain_mode)
self.update_particle_info(index)
# noinspection PyUnusedLocal
def best_friend(self, index):
"""
select the best fit out of a random set of particles
returns the "best friend"
"""
friends = np.random.choice(self.best, self.friends, replace=False)
index = np.argmin(friends['_rfac'])
return friends[index]
class ParticleSwarmHandler(population.PopulationHandler):
"""
model handler which implements the particle swarm optimization algorithm.
"""
def __init__(self):
super(ParticleSwarmHandler, self).__init__()
self._pop = SwarmPopulation()

155
pmsco/optimizers/table.py Normal file
View File

@ -0,0 +1,155 @@
"""
@package pmsco.table
table scan optimization handler
the table scan scans through an explicit table of model parameters.
it can be used to recalculate models from a previous optimization run on different scans,
or as an interface to external optimizers.
new elements can be added to the table while the calculation loop is in progress.
though the concepts _population_ and _optimization_ are not intrinsic to a table scan,
the classes defined here inherit from the generic population class and optimization handler.
this is done to share as much code as possible between the different optimizers.
the only difference is that the table optimizer does not generate models internally.
instead, it loads them (possibly repeatedly) from a file or asks the project code to provide the data.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
class TablePopulation(population.Population):
"""
population generated from explicit values.
this class maintains a population that is updated from a table of explicit values.
the table can be static (defined at the start of the optimization process)
or dynamic (new models appended during the optimization process).
for each generation, the table is read and the next models are imported into the population.
the class de-duplicates the table, i.e. models with equal parameters as a previous one are not calculated again.
it is, thus, perfectly fine that new models are appended to the table rather than overwrite previous entries.
the table can be built from the following data sources:
@arg (numpy.ndarray): structured array that can be added to self.positions,
having at least the columns defining the model parameters.
@arg (sequence of dict, numpy.ndarray, numpy.void, named tuple):
each element must be syntactically compatible with a dict
that holds the model parameters.
@arg (str): file name that contains a table in the same format as
@ref pmsco.optimizers.population.Population.save_array produces.
@arg (callable): a function that returns one of the above objects
(or None to mark the end of the table).
the data source is passed as an argument to the self.setup() method.
structured arrays and sequences cannot be modified after they are passed to `setup`.
this means that the complete table must be known at the start of the process.
the most flexible way is to pass a function that generates a structured array in each call.
this would even allow to include a non-standard optimization algorithm.
the function is best defined in the custom project class.
the population calls it every time before a new generation starts.
to end the optimization process, it simply returns None.
the table can also be defined in an external file, e.g. as calculated by other programs or edited manually.
the table file can either remain unchanged during the optimization process,
or new models can be added while the optimization is in progress.
in the latter case, note that there is no reliable synchronization of file access.
first, writing to the file must be as short as possible.
the population class has a read timeout of ten seconds.
second, because it is impossible to know whether the file has been read or not,
new models should be _appended_ rather than _overwrite_ previous ones.
the population class automatically skips models that have already been read.
this class supports does not support seeding.
although, a seed file is accepted, it is not used.
patching is allowed, but there is normally no advantage over modifying the table.
the domain is used to define the model parameters and the parameter range.
models violating the parameter domain are ignored.
"""
## @var table_source
# data source of the model table
#
# this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
# e.g. a file name, a numpy structured array, or a function returning a structured array.
# see the class description for details.
def __init__(self):
"""
initialize the population object.
"""
super(TablePopulation, self).__init__()
self.table_source = None
self.position_constrain_mode = 'error'
def setup(self, size, domain, **kwargs):
"""
set up the population arrays, parameter domain and data source.
@param size: requested number of particles.
this does not need to correspond to the number of table entries.
on each generation the population loads up to this number of new entries from the table source.
@param domain: definition of initial and limiting model parameters
expected by the cluster and parameters functions.
@arg domain.start: not used.
@arg domain.min: minimum values allowed.
@arg domain.max: maximum values allowed.
@arg domain.step: not used.
the following arguments are keyword arguments.
the method also accepts the inherited arguments for seeding. they do not have an effect, however.
@param table_source: data source of the model table.
this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
e.g. a file name, a numpy structured array, or a function returning a structured array.
see the class description for details.
@return: None
"""
super(TablePopulation, self).setup(size, domain, **kwargs)
self.table_source = kwargs['table_source']
def advance_population(self):
"""
advance the population by one step.
this methods re-imports the table file
and copies the table to current population.
@return: None
"""
self.import_positions(self.table_source)
self.advance_from_import()
super(TablePopulation, self).advance_population()
class TableModelHandler(population.PopulationHandler):
"""
model handler which implements the table algorithm.
"""
def __init__(self):
super(TableModelHandler, self).__init__()
self._pop = TablePopulation()