update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100 tagged rev-master-2.0.0
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions
--- a/pmsco/optimizers/init.py
+++ b/pmsco/optimizers/init.py
--- a/pmsco/optimizers/genetic.py
+++ b/pmsco/optimizers/genetic.py
@ -0,0 +1,308 @@
+"""
+@package pmsco.optimizers.genetic
+genetic optimization algorithm.
+
+this module implements a genetic algorithm for structural optimization.
+
+the genetic algorithm is adapted from
+D. A. Duncan et al., Surface Science 606, 278 (2012)
+
+the genetic algorithm evolves a population of individuals
+by a combination of inheritance, crossover and mutation
+and R-factor based selection.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2018 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import logging
+import numpy as np
+import random
+import pmsco.optimizers.population as population
+from pmsco.helpers import BraceMessage as BMsg
+
+logger = logging.getLogger(__name__)
+
+
+class GeneticPopulation(population.Population):
+    """
+    population implementing a genetic optimization algorithm.
+
+    the genetic algorithm implements the following principles:
+
+    1. inheritance: two children of a new generation are generated from the genes (i.e. model parameters)
+       of two parents of the old generation.
+    2. elitism: individuals with similar r-factors are more likely to mate.
+    3. crossover: the genes of the parents are randomly distributed to their children.
+    4. mutation: a gene may mutate at random.
+    5. selection: the globally best individual is added to a parent population (and replaces the worst).
+
+    the main tuning parameter of the algorithm is the mutation_step which is copied from the domain.step.
+    it defines the width of a gaussian distribution of change under a weak mutation.
+    it should be large enough so that the whole parameter space can be probed,
+    but small enough that a frequent mutation does not throw the individual out of the convergence region.
+    typically, the step should be of the order of the parameter range divided by the population size.
+
+    other tunable parameters are the mating_factor, the weak_mutation_probability and the strong_mutation_probability.
+    the defaults should normally be fine.
+    """
+
+    ## @var weak_mutation_probability
+    #
+    # probability (between 0 and 1) that a parameter changes in the mutate_weak() method.
+    #
+    # the default is 1.0, i.e., each parameter mutates in each generation.
+    #
+    # 1.0 has shown better coverage of the continuous parameter space and faster finding of the optimum.
+
+    ## @var strong_mutation_probability
+    #
+    # probability (between 0 and 1) that a parameter changes in the mutate_strong() method.
+    #
+    # the default is 0.01, i.e., on average, every hundredth probed parameter is affected by a strong mutation.
+    # if the model contains 10 parameters, for example,
+    # every tenth particle would see a mutation of at least one of its parameters.
+    #
+    # too high value may disturb convergence,
+    # too low value may trap the algorithm in a local optimum.
+
+    ## @var mating_factor
+    #
+    # inverse width of the mating preference distribution.
+    #
+    # the greater this value, the more similar partners are mated by the mate_parents() method.
+    #
+    # the default value 4.0 results in a probability of about 0.0025
+    # that the best particle mates the worst.
+
+    ## @var position_constrain_mode
+    #
+    # the position constrain mode selects what to do if a particle violates the parameter limits.
+    #
+    # the default is "random" which resets the parameter to a random value.
+
+    ## @var mutation_step
+    #
+    # standard deviations of the exponential distribution function used in the mutate_weak() method.
+    # the variable is a dictionary with the same keys as model_step (the parameter domain).
+    #
+    # it is initialized from the domain.step
+    # or set to a default value based on the parameter range and population size.
+
+    def __init__(self):
+        """
+        initialize the population object.
+
+        """
+        super(GeneticPopulation, self).__init__()
+
+        self.weak_mutation_probability = 1.0
+        self.strong_mutation_probability = 0.01
+        self.mating_factor = 4.
+        self.position_constrain_mode = 'random'
+        self.mutation_step = {}
+
+    def setup(self, size, domain, **kwargs):
+        """
+        @copydoc Population.setup()
+
+        in addition to the inherited behaviour, this method initializes self.mutation_step.
+        mutation_step of a parameter is set to its domain.step if non-zero.
+        otherwise it is set to the parameter range divided by the population size.
+        """
+        super(GeneticPopulation, self).setup(size, domain, **kwargs)
+
+        for key in self.model_step:
+            val = self.model_step[key]
+            self.mutation_step[key] = val if val != 0 else (self.model_max[key] - self.model_min[key]) / size
+
+    def randomize(self, pos=True, vel=True):
+        """
+        initializes a "random" population.
+
+        this implementation is a new proposal.
+        the distribution is not completely random.
+        rather, a position vector (by parameter) is initialized with a linear function
+        that covers the parameter domain.
+        the linear function is then permuted randomly.
+
+        the method does not update the particle info fields.
+
+        @param pos: randomize positions. if False, the positions are not changed.
+        @param vel: randomize velocities. if False, the velocities are not changed.
+        """
+        if pos:
+            for key in self.model_start:
+                self.pos[key] = np.random.permutation(np.linspace(self.model_min[key], self.model_max[key],
+                                                                  self.pos.shape[0]))
+        if vel:
+            for key in self.model_start:
+                d = (self.model_max[key] - self.model_min[key]) / 8
+                self.vel[key] = np.random.permutation(np.linspace(-d, d, self.vel.shape[0]))
+
+    def advance_population(self):
+        """
+        advance the population by one generation.
+
+        the population is advanced in several steps:
+        1. replace the worst individual by the best found so far.
+        2. mate the parents in pairs of two.
+        3. produce children by crossover from the parents.
+        4. apply weak mutations.
+        5. apply strong mutations.
+
+        if generation is lower than zero, the method increases the generation number but does not advance the particles.
+
+        @return: None
+        """
+        if not self._hold_once:
+            self.generation += 1
+
+            pop = self.pos.copy()
+            pop.sort(order='_rfac')
+            elite = self.best.copy()
+            elite.sort(order='_rfac')
+            if elite[0]['_model'] not in pop['_model']:
+                elite[0]['_particle'] = pop[-1]['_particle']
+                pop[-1] = elite[0]
+                pop.sort(order='_rfac')
+
+            parents = self.mate_parents(pop)
+
+            children = []
+            for x, y in parents:
+                a, b = self.crossover(x, y)
+                children.append(a)
+                children.append(b)
+
+            for child in children:
+                index = child['_particle']
+                self.mutate_weak(child, self.weak_mutation_probability)
+                self.mutate_strong(child, self.strong_mutation_probability)
+                self.mutate_duplicate(child)
+                for key in self.model_start:
+                    vel = child[key] - self.pos[index][key]
+                    child[key], vel, self.model_min[key], self.model_max[key] = \
+                        self.constrain_position(child[key], vel, self.model_min[key], self.model_max[key],
+                                                self.position_constrain_mode)
+
+                self.pos[index] = child
+                self.update_particle_info(index)
+
+        super(GeneticPopulation, self).advance_population()
+
+    def mate_parents(self, positions):
+        """
+        group the population in pairs of two.
+
+        to mate two individuals, the first individual of the (remaining) population selects one of the following
+        with an exponential preference of earlier ones.
+        the process is repeated until all individuals are mated.
+
+        @param positions: original population (numpy structured array)
+            the population should be ordered with best model first.
+        @return: sequence of pairs (tuples) of structured arrays holding one model each.
+        """
+        seq = [model for model in positions]
+        parents = []
+        while len(seq) >= 2:
+            p1 = seq.pop(0)
+            ln = len(seq)
+            i = min(int(random.expovariate(self.mating_factor / ln) * ln), ln - 1)
+            p2 = seq.pop(i)
+            parents.append((p1, p2))
+        return parents
+
+    def crossover(self, parent1, parent2):
+        """
+        crossover two parents to create two children.
+
+        for each model parameter, the parent's value is randomly assigned to either one of the children.
+
+        @param parent1: numpy structured array holding the model of the first parent.
+        @param parent2: numpy structured array holding the model of the second parent.
+        @return: tuple of the two crossed children.
+            these are two new ndarray instances that are independent of their parents.
+        """
+        child1 = parent1.copy()
+        child2 = parent2.copy()
+        for key in self.model_start:
+            if random.random() >= 0.5:
+                child1[key], child2[key] = parent2[key], parent1[key]
+        return child1, child2
+
+    def mutate_weak(self, model, probability):
+        """
+        apply a weak mutation to a model.
+
+        each parameter is changed to a different value in the domain of the parameter at the given probability.
+        the amount of change has a gaussian distribution with a standard deviation of mutation_step.
+
+        @param[in,out] model: structured numpy.ndarray holding the model parameters.
+            model is modified in place.
+
+        @param probability: probability between 0 and 1 at which to change a parameter.
+            0 = no change, 1 = force change.
+
+        @return: model (same instance as the @c model input argument).
+        """
+        for key in self.model_start:
+            if random.random() < probability:
+                model[key] += random.gauss(0, self.mutation_step[key])
+        return model
+
+    def mutate_strong(self, model, probability):
+        """
+        apply a strong mutation to a model.
+
+        each parameter is changed to a random value in the domain of the parameter at the given probability.
+
+        @param[in,out] model: structured numpy.ndarray holding the model parameters.
+            model is modified in place.
+
+        @param probability: probability between 0 and 1 at which to change a parameter.
+            0 = no change, 1 = force change.
+
+        @return: model (same instance as the @c model input argument).
+        """
+        for key in self.model_start:
+            if random.random() < probability:
+                model[key] = (self.model_max[key] - self.model_min[key]) * random.random() + self.model_min[key]
+        return model
+
+    def mutate_duplicate(self, model):
+        """
+        mutate a model if it is identical to a previously calculated one.
+
+        if the model was calculated before, the mutate_weak mutation is applied with probability 1.
+
+        @param[in,out] model: structured numpy.ndarray holding the model parameters.
+            model is modified in place.
+
+        @return: model (same instance as the @c model input argument).
+        """
+        try:
+            self.find_model(model)
+            self.mutate_weak(model, 1.0)
+        except ValueError:
+            pass
+        return model
+
+
+class GeneticOptimizationHandler(population.PopulationHandler):
+    """
+    model handler which implements a genetic algorithm.
+
+    """
+
+    def __init__(self):
+        super(GeneticOptimizationHandler, self).__init__()
+        self._pop = GeneticPopulation()
--- a/pmsco/optimizers/gradient.py
+++ b/pmsco/optimizers/gradient.py
@ -0,0 +1,280 @@
+"""
+gradient optimization module for MSC calculations
+
+the module starts multiple MSC calculations and optimizes the model parameters
+with a gradient search.
+
+the optimization task is distributed over multiple processes using MPI.
+the optimization must be started with N+1 processes in the MPI environment,
+where N equals the number of fit parameters.
+
+IMPLEMENTATION IN PROGRESS - DEBUGGING
+
+Requires: scipy, numpy
+
+Author: Matthias Muntwiler
+
+Copyright (c) 2015 by Paul Scherrer Institut
+
+Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+import numpy as np
+import scipy.optimize as so
+import data as md
+from mpi4py import MPI
+
+# messages sent from master to slaves
+
+# master sends new assignment
+# the message is a dictionary of model parameters
+TAG_NEW_TASK = 1
+# master calls end of calculation
+# the message is empty
+TAG_FINISH = 2
+# master sends current population
+# currently not used
+TAG_POPULATION = 2
+
+# messages sent from slaves to master
+# slave reports new result
+# the message is a dictionary of model parameters and results
+TAG_NEW_RESULT = 1
+# slave confirms end of calculation
+# currently not used
+TAG_FINISHED = 2
+
+class MscProcess(object):
+    """
+    Code shared by MscoMaster and MscoSlave
+    """
+    def __init__(self, comm):
+        self.comm = comm
+
+    def setup(self, project):
+        self.project = project
+        self.running = False
+        self.finishing = False
+        self.iteration = 0
+
+    def run(self):
+        pass
+
+    def cleanup(self):
+        pass
+
+    def calc(self, pars):
+        """
+        Executes a single MSC calculation.
+
+        pars: A dictionary of parameters expected by the cluster and parameters functions.
+
+        returns: pars with three additional values:
+            rank:  rank of the calculation process
+            index: iteration index of the calculation process
+            rfac:  resulting R-factor
+
+            all other calculation results are discarded.
+        """
+        rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration)
+
+        # create parameter and cluster structures
+        clu = self.project.create_cluster(pars)
+        par = self.project.create_params(pars)
+
+        # generate file names
+        base_filename = "%s_%u_%u" % (self.project.output_file, self.comm.rank, self.iteration)
+
+        # call the msc program
+        result_etpi = self.project.run_calc(par, clu, self.project.scan_file, base_filename, delete_files=True)
+
+        # calculate modulation function and R-factor
+        result_etpi = md.calc_modfunc_lowess(result_etpi)
+        result_r = md.rfactor(self.project.scan_modf, result_etpi)
+
+        pars['rank'] = self.comm.rank
+        pars['iter'] = self.iteration
+        pars['rfac'] = result_r
+
+        return pars
+
+class MscMaster(MscProcess):
+    def __init__(self, comm):
+        super(MscMaster, self).__init__(comm)
+        self.slaves = self.comm.Get_size() - 1
+        self.running_slaves = 0
+
+    def setup(self, project):
+        super(MscMaster, self).setup(project)
+        self.dom = project.create_domain()
+        self.running_slaves = self.slaves
+
+        self._outfile = open(self.project.output_file + ".dat", "w")
+        self._outfile.write("#")
+        self._outfile_keys = self.dom.start.keys()
+        self._outfile_keys.append('rfac')
+        for name in self._outfile_keys:
+            self._outfile.write(" " + name)
+        self._outfile.write("\n")
+
+    def run(self):
+        """
+        starts the minimization
+        """
+        # pack initial guess, bounds, constant parameters
+        nparams = len(self.dom.start)
+        fit_params = np.zeros((nparams))
+        params_index = {}
+        const_params = self.dom.max.copy()
+        bounds = []
+        n_fit_params = 0
+        for key in self.dom.start:
+            if self.dom.max[key] > self.dom.min[key]:
+                fit_params[n_fit_params] = self.dom.start[key]
+                params_index[key] = n_fit_params
+                n_fit_params += 1
+                bounds.append((self.dom.min[key], self.dom.max[key]))
+        fit_params.resize((n_fit_params))
+
+        fit_result = so.minimize(self._minfunc, fit_params,
+            args=(params_index, const_params),
+            method='L-BFGS-B', jac=True, bounds=bounds)
+
+        msc_result = const_params.copy()
+        for key, index in params_index.items():
+            msc_result[key] = fit_result.x[index]
+        msc_result['rfac'] = fit_result.fun
+
+        self._outfile.write("# result of gradient optimization\n")
+        self._outfile.write("# success = {0}, iterations = {1}, calculations = {2}\n".format(fit_result.success, fit_result.nit, fit_result.nfev))
+        self._outfile.write("# message: {0}\n".format(fit_result.message))
+        for name in self._outfile_keys:
+            self._outfile.write(" " + str(msc_result[name]))
+        self._outfile.write("\n")
+
+    def _minfunc(self, fit_params, params_index, const_params):
+        """
+        function to be minimized
+
+        fit_params (numpy.ndarray): current fit position
+        master (MscoMaster): reference to the master process
+        params_index (dict): dictionary of fit parameters
+            and their index in fit_params.
+            key=MSC parameter name, value=index to fit_params.
+        const_params (dict): dictionary of MSC parameters
+            holding (at least) the constant parameter values.
+            a copy of this instance, updated with the current fit position,
+            is passed to MSC.
+        """
+
+        # unpack parameters
+        msc_params = const_params.copy()
+        for key, index in params_index.items():
+            msc_params[key] = fit_params[index]
+
+        # run MSC calculations
+        rfac, jac_dict = self.run_msc_calcs(msc_params, params_index)
+
+        # pack jacobian
+        jac_arr = np.zeros_like(fit_params)
+        for key, index in params_index.items():
+            jac_arr[index] = jac_dict[key]
+
+        return rfac, jac_arr
+
+    def run_msc_calcs(self, params, params_index):
+        """
+        params: dictionary of actual parameters
+        params_index: dictionary of fit parameter indices.
+            only the keys are used here
+            to decide for which parameters the derivative is calculated.
+
+        returns:
+        (float) R-factor at the params location
+        (dict) approximate gradient at the params location
+        """
+        # distribute tasks for gradient
+        slave_rank = 1
+        for key in params_index:
+            params2 = params.copy()
+            params2[key] += self.dom.step[key]
+            params2['key'] = key
+            self.comm.send(params2, dest=slave_rank, tag=TAG_NEW_TASK)
+            slave_rank += 1
+
+        # run calculation for actual position
+        result0 = self.calc(params)
+        for name in self._outfile_keys:
+            self._outfile.write(" " + str(result0[name]))
+        self._outfile.write("\n")
+
+        # gather results
+        s = MPI.Status()
+        jacobian = params.copy()
+        for slave in range(1, slave_rank):
+            result1 = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
+            if s.tag == TAG_NEW_RESULT:
+                key = result1['key']
+                jacobian[key] = (result1['rfac'] - result0['rfac']) / (result1[key] - result0[key])
+                for name in self._outfile_keys:
+                    self._outfile.write(" " + str(result1[name]))
+                self._outfile.write("\n")
+
+        self._outfile.flush()
+        return result0['rfac'], jacobian
+
+    def cleanup(self):
+        """
+        cleanup: close output file, terminate slave processes
+        """
+        self._outfile.close()
+        for rank in range(1, self.running_slaves + 1):
+            self.comm.send(None, dest=rank, tag=TAG_FINISH)
+        super(MscMaster, self).cleanup()
+
+class MscSlave(MscProcess):
+
+    def run(self):
+        """
+        Waits for messages from the master and dispatches tasks.
+        """
+        s = MPI.Status()
+        self.running = True
+        while self.running:
+            data = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
+            if s.tag == TAG_NEW_TASK:
+                self.accept_task(data)
+            elif s.tag == TAG_FINISH:
+                self.running = False
+
+    def accept_task(self, pars):
+        """
+        Executes a calculation task and returns the result to the master.
+        """
+        result = self.calc(pars)
+        self.comm.send(result, dest=0, tag=TAG_NEW_RESULT)
+        self.iteration += 1
+
+def optimize(project):
+    """
+    main entry point for optimization
+
+    rank 0: starts the calculation, distributes tasks
+    ranks 1...N-1: work on assignments from rank 0
+    """
+    mpi_comm = MPI.COMM_WORLD
+    mpi_rank = mpi_comm.Get_rank()
+
+    if mpi_rank == 0:
+        master = MscMaster(mpi_comm)
+        master.setup(project)
+        master.run()
+        master.cleanup()
+    else:
+        slave = MscSlave(mpi_comm)
+        slave.setup(project)
+        slave.run()
+        slave.cleanup()
--- a/pmsco/optimizers/grid.py
+++ b/pmsco/optimizers/grid.py
@ -0,0 +1,421 @@
+"""
+@package pmsco.grid
+grid search optimization handler.
+
+the module starts multiple MSC calculations and varies parameters on a fixed coordinate grid.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2015 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import datetime
+import math
+import numpy as np
+import logging
+
+from pmsco.compat import open
+import pmsco.handlers as handlers
+import pmsco.graphics as graphics
+from pmsco.helpers import BraceMessage as BMsg
+
+logger = logging.getLogger(__name__)
+
+
+class GridPopulation(object):
+    """
+    grid population.
+    """
+
+    ## @var model_start
+    # (dict) initial model parameters.
+    # read-only. call setup() to change this attribute.
+
+    ## @var model_min
+    # (dict) low limits of the model parameters.
+    # read-only. call setup() to change this attribute.
+
+    ## @var model_max
+    # (dict) high limits of the model parameters.
+    # if min == max, the parameter is kept constant.
+    # read-only. call setup() to change this attribute.
+
+    ## @var model_max
+    # (dict) high limits of the model parameters.
+    # read-only. call setup() to change this attribute.
+
+    ## @var model_step
+    # (dict) initial velocity (difference between two steps) of the particle.
+    # read-only. call setup() to change this attribute.
+
+    ## @var model_count
+    # number of models (grid points).
+    # initial value = 0.
+
+    ## @var positions
+    # (numpy.ndarray) flat list of grid coordinates and results.
+    #
+    # the column names include the names of the model parameters, taken from domain.start,
+    # and the special names @c '_model', @c '_rfac'.
+    # the special fields have the following meanings:
+    #
+    # * @c '_model': model number.
+    #   the model number counts identifies the grid point.
+    #   the field is used to associate the result of a calculation with the coordinate vector.
+    #   the model handlers use it to derive their model ID.
+    #
+    # * @c '_rfac': calculated R-factor for this position.
+    #   it is set by the add_result() method.
+    #
+    # @note if your read a single element, e.g. pos[0], from the array, you will get a numpy.void object.
+    # this object is a <em>view</em> of the original array item
+
+    def __init__(self):
+        """
+        initialize the population object.
+
+        """
+        self.model_start = {}
+        self.model_min = {}
+        self.model_max = {}
+        self.model_step = {}
+
+        self.model_count = 0
+
+        self.positions = None
+
+        self.search_keys = []
+        self.fixed_keys = []
+
+    @staticmethod
+    def get_model_dtype(model_params):
+        """
+        get numpy array data type for model parameters and grid control variables.
+
+        @param model_params: dictionary of model parameters or list of parameter names.
+
+        @return: dtype for use with numpy array constructors.
+            this is a sorted list of (name, type) tuples.
+        """
+        dt = []
+        for key in model_params:
+            dt.append((key, 'f4'))
+        dt.append(('_model', 'i4'))
+        dt.append(('_rfac', 'f4'))
+        dt.sort(key=lambda t: t[0].lower())
+        return dt
+
+    def setup(self, domain):
+        """
+        set up the population and result arrays.
+
+        @param domain: definition of initial and limiting model parameters
+            expected by the cluster and parameters functions.
+            the attributes have the following meanings:
+            @arg start: values of the fixed parameters.
+            @arg min:   minimum values allowed.
+            @arg max:   maximum values allowed.
+                        if abs(max - min) < step/2 , the parameter is kept constant.
+            @arg step:  step size (distance between two grid points).
+                        if step <= 0, the parameter is kept constant.
+
+        """
+        self.model_start = domain.start
+        self.model_min = domain.min
+        self.model_max = domain.max
+        self.model_step = domain.step
+
+        self.model_count = 1
+        self.search_keys = []
+        self.fixed_keys = []
+        scales = []
+
+        for p in domain.step.keys():
+            if domain.step[p] > 0:
+                n = np.round((domain.max[p] - domain.min[p]) / domain.step[p]) + 1
+            else:
+                n = 1
+            if n > 1:
+                self.search_keys.append(p)
+                scales.append(np.linspace(domain.min[p], domain.max[p], n))
+            else:
+                self.fixed_keys.append(p)
+
+        # scales is a list of 1D arrays that hold the coordinates of the individual dimensions
+        # nd_positions is a list of N-D arrays that hold the coordinates in all multiple dimensions
+        # flat_positions is a list of 1D arrays that hold the coordinates in flat sequence
+        if len(scales) > 1:
+            positions_nd = np.meshgrid(*scales, indexing='ij')
+            positions_flat = [arr.flatten() for arr in positions_nd]
+        else:
+            positions_flat = scales
+        self.model_count = positions_flat[0].shape[0]
+
+        # shuffle the calculation order so that we may see the more interesting parts earlier
+        shuffle_index = np.arange(self.model_count)
+        np.random.shuffle(shuffle_index)
+        positions_reordered = [pos[shuffle_index] for pos in positions_flat]
+
+        dt = self.get_model_dtype(self.model_min)
+
+        # positions
+        self.positions = np.zeros(self.model_count, dtype=dt)
+
+        for idx, key in enumerate(self.search_keys):
+            self.positions[key] = positions_reordered[idx]
+        for idx, key in enumerate(self.fixed_keys):
+            self.positions[key] = self.model_start[key]
+
+        self.positions['_model'] = np.arange(self.model_count)
+        self.positions['_rfac'] = 2.1
+
+    def add_result(self, particle, rfac):
+        """
+        add a calculation particle to the results array.
+
+        @param particle: dictionary of model parameters and particle values.
+            the keys must correspond to the columns of the pos array,
+            i.e. the names of the model parameters plus the _rfac, and _model fields.
+
+        @param rfac: calculated R-factor.
+            the R-factor is written to the '_rfac' field.
+
+        @return None
+        """
+        model = particle['_model']
+        self.positions['_rfac'][model] = rfac
+
+    def save_array(self, filename, array):
+        """
+        saves a population array to a text file.
+
+        @param array: population array to save.
+            must be one of self.pos, self.vel, self.best, self.results
+        """
+        header = " ".join(self.positions.dtype.names)
+        np.savetxt(filename, array, fmt='%g', header=header)
+
+    def load_array(self, filename, array):
+        """
+        load a population array from a text file.
+
+        the array to load must be compatible with the current population
+        (same number of rows, same columns).
+        the first row must contain column names.
+        the ordering of columns may be different.
+        the returned array is ordered according to the array argument.
+
+        @param array: population array to load.
+            must be one of self.pos, self.vel, self.results.
+
+        @return array with loaded data.
+            this may be the same instance as on input.
+
+        @raise AssertionError if the number of rows of the two files differ.
+        """
+        data = np.genfromtxt(filename, names=True)
+        assert data.shape == array.shape
+        for name in data.dtype.names:
+            array[name] = data[name]
+        return array
+
+    def save_population(self, base_filename):
+        """
+        saves the population array to a set of text files.
+
+        the file name extensions are .pos, .vel, and .best
+        """
+        self.save_array(base_filename + ".pos", self.positions)
+
+    def load_population(self, base_filename):
+        """
+        loads the population array from a set of previously saved text files.
+        this can be used to continue an optimization job.
+
+        the file name extensions are .pos, .vel, and .best.
+        the files must have the same format as produced by save_population.
+        the files must have the same number of rows.
+        """
+        self.load_array(base_filename + ".pos", self.positions)
+
+    def save_results(self, filename):
+        """
+        saves the complete list of calculations results.
+        """
+        self.save_array(filename, self.positions)
+
+
+class GridSearchHandler(handlers.ModelHandler):
+    """
+    model handler which implements the grid search algorithm.
+
+    """
+
+    ## @var _pop (Population)
+    # holds the population object.
+
+    ## @var _outfile (file)
+    # output file for model parametes and R factor.
+    # the file is open during calculations.
+    # each calculation result adds one line.
+
+    ## @var _model_time (timedelta)
+    #  estimated CPU time to calculate one model.
+    #  this value is the maximum time measured of the completed calculations.
+    #  it is used to determine when the optimization should be finished so that the time limit is not exceeded.
+
+    ## @var _timeout (bool)
+    #  indicates when the handler has run out of time,
+    #  i.e. time is up before convergence has been reached.
+    #  if _timeout is True, create_tasks() will not create further tasks,
+    #  and add_result() will signal completion when the _pending_tasks queue becomes empty.
+
+    def __init__(self):
+        super(GridSearchHandler, self).__init__()
+        self._pop = None
+        self._outfile = None
+        self._model_time = datetime.timedelta()
+        self._timeout = False
+        self._invalid_limit = 10
+        self._next_model = 0
+
+    def setup(self, project, slots):
+        """
+        initialize the particle swarm and open an output file.
+
+        @param project:
+
+        @param slots: number of calculation processes available through MPI.
+            for efficiency reasons we set the population size twice the number of available slots.
+            the minimum number of slots is 1, the recommended value is 10 or greater.
+            the population size is set to at least 4.
+
+        @return:
+        """
+        super(GridSearchHandler, self).setup(project, slots)
+
+        self._pop = GridPopulation()
+        self._pop.setup(self._project.create_domain())
+        self._invalid_limit = max(slots, self._invalid_limit)
+
+        self._outfile = open(self._project.output_file + ".dat", "w")
+        self._outfile.write("# ")
+        self._outfile.write(" ".join(self._pop.positions.dtype.names))
+        self._outfile.write("\n")
+
+        return None
+
+    def cleanup(self):
+        self._outfile.close()
+        super(GridSearchHandler, self).cleanup()
+
+    def create_tasks(self, parent_task):
+        """
+        develop the particle population and create a calculation task per particle.
+
+        this method advances the population by one step, and generates one task per particle.
+        during the first call, the method first sets up a new population.
+
+        the process loop calls this method every time the length of the task queue drops
+        below  the number of calculation processes (slots).
+
+        @return list of generated tasks. empty list if all grid points have been calculated.
+        """
+
+        super(GridSearchHandler, self).create_tasks(parent_task)
+
+        # this is the top-level handler, we expect just one parent: root.
+        parent_id = parent_task.id
+        assert parent_id == (-1, -1, -1, -1, -1)
+        self._parent_tasks[parent_id] = parent_task
+
+        time_pending = self._model_time * len(self._pending_tasks)
+        time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
+
+        out_tasks = []
+        time_pending += self._model_time
+        if time_pending > time_avail:
+            self._timeout = True
+
+        model = self._next_model
+        if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
+            new_task = parent_task.copy()
+            new_task.parent_id = parent_id
+            pos = self._pop.positions[model]
+            new_task.model = {k: pos[k] for k in pos.dtype.names}
+            new_task.change_id(model=model)
+
+            child_id = new_task.id
+            self._pending_tasks[child_id] = new_task
+            out_tasks.append(new_task)
+            self._next_model += 1
+
+        return out_tasks
+
+    def add_result(self, task):
+        """
+        calculate the R factor of the result and store it in the positions array.
+
+        * append the result to the result output file.
+        * update the execution time statistics.
+        * remove temporary files if requested.
+        * check whether the grid search is complete.
+
+        @return parent task (CalculationTask) if the search is complete, @c None otherwise.
+        """
+        super(GridSearchHandler, self).add_result(task)
+
+        self._complete_tasks[task.id] = task
+        del self._pending_tasks[task.id]
+        parent_task = self._parent_tasks[task.parent_id]
+
+        if task.result_valid:
+            assert not math.isnan(task.rfac)
+            task.model['_rfac'] = task.rfac
+            self._pop.add_result(task.model, task.rfac)
+
+            if self._outfile:
+                s = (str(task.model[name]) for name in self._pop.positions.dtype.names)
+                self._outfile.write(" ".join(s))
+                self._outfile.write("\n")
+                self._outfile.flush()
+
+        self._project.files.update_model_rfac(task.id.model, task.rfac)
+        self._project.files.set_model_complete(task.id.model, True)
+
+        if task.result_valid:
+            if task.time > self._model_time:
+                self._model_time = task.time
+        else:
+            self._invalid_count += 1
+
+        # grid search complete?
+        if len(self._pending_tasks) == 0:
+            del self._parent_tasks[parent_task.id]
+        else:
+            parent_task = None
+
+        self.cleanup_files()
+        return parent_task
+
+    def save_report(self, root_task):
+        """
+        generate a graphical summary of the optimization.
+
+        @param root_task: (CalculationTask) the id.model attribute is used to register the generated files.
+
+        @return: None
+        """
+        super(GridSearchHandler, self).save_report(root_task)
+
+        files = graphics.rfactor.render_results(self._project.output_file + ".dat", self._pop.positions)
+        for f in files:
+            self._project.files.add_file(f, root_task.id.model, "report")
--- a/pmsco/optimizers/population.py
+++ b/pmsco/optimizers/population.py
--- a/pmsco/optimizers/swarm.py
+++ b/pmsco/optimizers/swarm.py
@ -0,0 +1,139 @@
+"""
+@package pmsco.optimizers.swarm
+particle swarm optimization handler.
+
+the module starts multiple MSC calculations and optimizes the model parameters
+according to the particle swarm optimization algorithm.
+
+Particle swarm optimization adapted from
+D. A. Duncan et al., Surface Science 606, 278 (2012)
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2015-18 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import logging
+import numpy as np
+import pmsco.optimizers.population as population
+from pmsco.helpers import BraceMessage as BMsg
+
+logger = logging.getLogger(__name__)
+
+
+class SwarmPopulation(population.Population):
+    """
+    particle swarm population.
+    """
+
+    ## @var friends
+    # number of other particles that each particle consults for the global best fit.
+    # default = 3.
+
+    ## @var momentum
+    # momentum of the particle.
+    # default = 0.689343.
+
+    ## @var attract_local
+    # preference for returning to the local best fit
+    # default = 1.92694.
+
+    ## @var attract_global
+    # preference for heading towards the global best fit.
+    # default = 1.92694
+
+    def __init__(self):
+        """
+        initialize the population object.
+
+        """
+        super(SwarmPopulation, self).__init__()
+
+        self.friends = 3
+        self.momentum = 0.689343
+        self.attract_local = 1.92694
+        self.attract_global = 1.92694
+        self.position_constrain_mode = 'default'
+        self.velocity_constrain_mode = 'default'
+
+    def advance_population(self):
+        """
+        advance the population by one step.
+
+        this method just calls advance_particle() for each particle of the population.
+        if generation is lower than zero, the method increases the generation number but does not advance the particles.
+
+        @return: None
+        """
+        if not self._hold_once:
+            self.generation += 1
+            for index, __ in enumerate(self.pos):
+                self.advance_particle(index)
+
+        super(SwarmPopulation, self).advance_population()
+
+    def advance_particle(self, index):
+        """
+        advance a particle by one step.
+
+        @param index: index of the particle in the population.
+        """
+
+        # note: the following two identifiers are views,
+        # assignment will modify the original array
+        pos = self.pos[index]
+        vel = self.vel[index]
+        # best fit that this individual has seen
+        xl = self.best[index]
+        # best fit that a group of others have seen
+        xg = self.best_friend(index)
+
+        for key in self.model_start:
+            # update velocity
+            dxl = xl[key] - pos[key]
+            dxg = xg[key] - pos[key]
+            pv = np.random.random()
+            pl = np.random.random()
+            pg = np.random.random()
+            vel[key] = (self.momentum * pv * vel[key] +
+                self.attract_local * pl * dxl +
+                self.attract_global * pg * dxg)
+            pos[key], vel[key], self.model_min[key], self.model_max[key] = \
+                self.constrain_velocity(pos[key], vel[key], self.model_min[key], self.model_max[key],
+                                        self.velocity_constrain_mode)
+            # update position
+            pos[key] += vel[key]
+            pos[key], vel[key], self.model_min[key], self.model_max[key] = \
+                self.constrain_position(pos[key], vel[key], self.model_min[key], self.model_max[key],
+                                        self.position_constrain_mode)
+
+        self.update_particle_info(index)
+
+    # noinspection PyUnusedLocal
+    def best_friend(self, index):
+        """
+        select the best fit out of a random set of particles
+
+        returns the "best friend"
+        """
+        friends = np.random.choice(self.best, self.friends, replace=False)
+        index = np.argmin(friends['_rfac'])
+        return friends[index]
+
+
+class ParticleSwarmHandler(population.PopulationHandler):
+    """
+    model handler which implements the particle swarm optimization algorithm.
+
+    """
+
+    def __init__(self):
+        super(ParticleSwarmHandler, self).__init__()
+        self._pop = SwarmPopulation()
--- a/pmsco/optimizers/table.py
+++ b/pmsco/optimizers/table.py
@ -0,0 +1,155 @@
+"""
+@package pmsco.table
+table scan optimization handler
+
+the table scan scans through an explicit table of model parameters.
+it can be used to recalculate models from a previous optimization run on different scans,
+or as an interface to external optimizers.
+new elements can be added to the table while the calculation loop is in progress.
+
+though the concepts _population_ and _optimization_ are not intrinsic to a table scan,
+the classes defined here inherit from the generic population class and optimization handler.
+this is done to share as much code as possible between the different optimizers.
+the only difference is that the table optimizer does not generate models internally.
+instead, it loads them (possibly repeatedly) from a file or asks the project code to provide the data.
+
+@author Matthias Muntwiler, matthias.muntwiler@psi.ch
+
+@copyright (c) 2015-18 by Paul Scherrer Institut @n
+Licensed under the Apache License, Version 2.0 (the "License"); @n
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import logging
+import numpy as np
+import pmsco.optimizers.population as population
+from pmsco.helpers import BraceMessage as BMsg
+
+logger = logging.getLogger(__name__)
+
+
+class TablePopulation(population.Population):
+    """
+    population generated from explicit values.
+
+    this class maintains a population that is updated from a table of explicit values.
+    the table can be static (defined at the start of the optimization process)
+    or dynamic (new models appended during the optimization process).
+
+    for each generation, the table is read and the next models are imported into the population.
+    the class de-duplicates the table, i.e. models with equal parameters as a previous one are not calculated again.
+    it is, thus, perfectly fine that new models are appended to the table rather than overwrite previous entries.
+
+    the table can be built from the following data sources:
+
+    @arg (numpy.ndarray): structured array that can be added to self.positions,
+        having at least the columns defining the model parameters.
+    @arg (sequence of dict, numpy.ndarray, numpy.void, named tuple):
+        each element must be syntactically compatible with a dict
+        that holds the model parameters.
+    @arg (str): file name that contains a table in the same format as
+        @ref pmsco.optimizers.population.Population.save_array produces.
+    @arg (callable): a function that returns one of the above objects
+        (or None to mark the end of the table).
+
+    the data source is passed as an argument to the self.setup() method.
+    structured arrays and sequences cannot be modified after they are passed to `setup`.
+    this means that the complete table must be known at the start of the process.
+
+    the most flexible way is to pass a function that generates a structured array in each call.
+    this would even allow to include a non-standard optimization algorithm.
+    the function is best defined in the custom project class.
+    the population calls it every time before a new generation starts.
+    to end the optimization process, it simply returns None.
+
+    the table can also be defined in an external file, e.g. as calculated by other programs or edited manually.
+    the table file can either remain unchanged during the optimization process,
+    or new models can be added while the optimization is in progress.
+    in the latter case, note that there is no reliable synchronization of file access.
+
+    first, writing to the file must be as short as possible.
+    the population class has a read timeout of ten seconds.
+
+    second, because it is impossible to know whether the file has been read or not,
+    new models should be _appended_ rather than _overwrite_ previous ones.
+    the population class automatically skips models that have already been read.
+
+    this class supports does not support seeding.
+    although, a seed file is accepted, it is not used.
+    patching is allowed, but there is normally no advantage over modifying the table.
+
+    the domain is used to define the model parameters and the parameter range.
+    models violating the parameter domain are ignored.
+    """
+
+    ## @var table_source
+    # data source of the model table
+    #
+    # this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
+    # e.g. a file name, a numpy structured array, or a function returning a structured array.
+    # see the class description for details.
+
+    def __init__(self):
+        """
+        initialize the population object.
+
+        """
+        super(TablePopulation, self).__init__()
+        self.table_source = None
+        self.position_constrain_mode = 'error'
+
+    def setup(self, size, domain, **kwargs):
+        """
+        set up the population arrays, parameter domain and data source.
+
+        @param size: requested number of particles.
+            this does not need to correspond to the number of table entries.
+            on each generation the population loads up to this number of new entries from the table source.
+
+        @param domain: definition of initial and limiting model parameters
+            expected by the cluster and parameters functions.
+            @arg domain.start: not used.
+            @arg domain.min:   minimum values allowed.
+            @arg domain.max:   maximum values allowed.
+            @arg domain.step:  not used.
+
+        the following arguments are keyword arguments.
+        the method also accepts the inherited arguments for seeding. they do not have an effect, however.
+
+        @param table_source: data source of the model table.
+            this can be any object accepted by @ref pmsco.optimizers.population.Population.import_positions,
+            e.g. a file name, a numpy structured array, or a function returning a structured array.
+            see the class description for details.
+
+        @return: None
+        """
+        super(TablePopulation, self).setup(size, domain, **kwargs)
+        self.table_source = kwargs['table_source']
+
+    def advance_population(self):
+        """
+        advance the population by one step.
+
+        this methods re-imports the table file
+        and copies the table to current population.
+
+        @return: None
+        """
+        self.import_positions(self.table_source)
+        self.advance_from_import()
+        super(TablePopulation, self).advance_population()
+
+
+class TableModelHandler(population.PopulationHandler):
+    """
+    model handler which implements the table algorithm.
+
+    """
+    def __init__(self):
+        super(TableModelHandler, self).__init__()
+        self._pop = TablePopulation()