pmsco-public/pmsco/optimizers/genetic.py

"""
@package pmsco.optimizers.genetic
genetic optimization algorithm.

this module implements a genetic algorithm for structural optimization.

the genetic algorithm is adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012)

the genetic algorithm evolves a population of individuals
by a combination of inheritance, crossover and mutation
and R-factor based selection.

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

@copyright (c) 2018 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import random
import pmsco.optimizers.population as population
from pmsco.helpers import BraceMessage as BMsg

logger = logging.getLogger(__name__)


class GeneticPopulation(population.Population):
    """
    population implementing a genetic optimization algorithm.

    the genetic algorithm implements the following principles:

    1. inheritance: two children of a new generation are generated from the genes (i.e. model parameters)
       of two parents of the old generation.
    2. elitism: individuals with similar r-factors are more likely to mate.
    3. crossover: the genes of the parents are randomly distributed to their children.
    4. mutation: a gene may mutate at random.
    5. selection: the globally best individual is added to a parent population (and replaces the worst).

    the main tuning parameter of the algorithm is the mutation_step which is copied from the model_space.step.
    it defines the width of a gaussian distribution of change under a weak mutation.
    it should be large enough so that the whole parameter space can be probed,
    but small enough that a frequent mutation does not throw the individual out of the convergence region.
    typically, the step should be of the order of the parameter range divided by the population size.

    other tunable parameters are the mating_factor, the weak_mutation_probability and the strong_mutation_probability.
    the defaults should normally be fine.
    """

    ## @var weak_mutation_probability
    #
    # probability (between 0 and 1) that a parameter changes in the mutate_weak() method.
    #
    # the default is 1.0, i.e., each parameter mutates in each generation.
    #
    # 1.0 has shown better coverage of the continuous parameter space and faster finding of the optimum.

    ## @var strong_mutation_probability
    #
    # probability (between 0 and 1) that a parameter changes in the mutate_strong() method.
    #
    # the default is 0.01, i.e., on average, every hundredth probed parameter is affected by a strong mutation.
    # if the model contains 10 parameters, for example,
    # every tenth particle would see a mutation of at least one of its parameters.
    #
    # too high value may disturb convergence,
    # too low value may trap the algorithm in a local optimum.

    ## @var mating_factor
    #
    # inverse width of the mating preference distribution.
    #
    # the greater this value, the more similar partners are mated by the mate_parents() method.
    #
    # the default value 4.0 results in a probability of about 0.0025
    # that the best particle mates the worst.

    ## @var position_constrain_mode
    #
    # the position constrain mode selects what to do if a particle violates the parameter limits.
    #
    # the default is "random" which resets the parameter to a random value.

    ## @var mutation_step
    #
    # standard deviations of the exponential distribution function used in the mutate_weak() method.
    # the variable is a dictionary with the same keys as model_step (the parameter space).
    #
    # it is initialized from the model_space.step
    # or set to a default value based on the parameter range and population size.

    def __init__(self):
        """
        initialize the population object.

        """
        super(GeneticPopulation, self).__init__()

        self.weak_mutation_probability = 1.0
        self.strong_mutation_probability = 0.01
        self.mating_factor = 4.
        self.position_constrain_mode = 'random'
        self.mutation_step = {}

    def setup(self, size, model_space, **kwargs):
        """
        @copydoc Population.setup()

        in addition to the inherited behaviour, this method initializes self.mutation_step.
        mutation_step of a parameter is set to its model_space.step if non-zero.
        otherwise it is set to the parameter range divided by the population size.
        """
        super(GeneticPopulation, self).setup(size, model_space, **kwargs)

        for key in self.model_step:
            val = self.model_step[key]
            self.mutation_step[key] = val if val != 0 else (self.model_max[key] - self.model_min[key]) / size

    def randomize(self, pos=True, vel=True):
        """
        initializes a "random" population.

        this implementation is a new proposal.
        the distribution is not completely random.
        rather, a position vector (by parameter) is initialized with a linear function
        that covers the parameter space.
        the linear function is then permuted randomly.

        the method does not update the particle info fields.

        @param pos: randomize positions. if False, the positions are not changed.
        @param vel: randomize velocities. if False, the velocities are not changed.
        """
        if pos:
            for key in self.model_start:
                self.pos[key] = np.random.permutation(np.linspace(self.model_min[key], self.model_max[key],
                                                                  self.pos.shape[0]))
        if vel:
            for key in self.model_start:
                d = (self.model_max[key] - self.model_min[key]) / 8
                self.vel[key] = np.random.permutation(np.linspace(-d, d, self.vel.shape[0]))

    def advance_population(self):
        """
        advance the population by one generation.

        the population is advanced in several steps:
        1. replace the worst individual by the best found so far.
        2. mate the parents in pairs of two.
        3. produce children by crossover from the parents.
        4. apply weak mutations.
        5. apply strong mutations.

        if generation is lower than zero, the method increases the generation number but does not advance the particles.

        @return: None
        """
        if not self._hold_once:
            self.generation += 1

            pop = self.pos.copy()
            pop.sort(order='_rfac')
            elite = self.best.copy()
            elite.sort(order='_rfac')
            if elite[0]['_model'] not in pop['_model']:
                elite[0]['_particle'] = pop[-1]['_particle']
                pop[-1] = elite[0]
                pop.sort(order='_rfac')

            parents = self.mate_parents(pop)

            children = []
            for x, y in parents:
                a, b = self.crossover(x, y)
                children.append(a)
                children.append(b)

            for child in children:
                index = child['_particle']
                self.mutate_weak(child, self.weak_mutation_probability)
                self.mutate_strong(child, self.strong_mutation_probability)
                self.mutate_duplicate(child)
                for key in self.model_start:
                    vel = child[key] - self.pos[index][key]
                    child[key], vel, self.model_min[key], self.model_max[key] = \
                        self.constrain_position(child[key], vel, self.model_min[key], self.model_max[key],
                                                self.position_constrain_mode)

                self.pos[index] = child
                self.update_particle_info(index)

        super(GeneticPopulation, self).advance_population()

    def mate_parents(self, positions):
        """
        group the population in pairs of two.

        to mate two individuals, the first individual of the (remaining) population selects one of the following
        with an exponential preference of earlier ones.
        the process is repeated until all individuals are mated.

        @param positions: original population (numpy structured array)
            the population should be ordered with best model first.
        @return: sequence of pairs (tuples) of structured arrays holding one model each.
        """
        seq = [model for model in positions]
        parents = []
        while len(seq) >= 2:
            p1 = seq.pop(0)
            ln = len(seq)
            i = min(int(random.expovariate(self.mating_factor / ln) * ln), ln - 1)
            p2 = seq.pop(i)
            parents.append((p1, p2))
        return parents

    def crossover(self, parent1, parent2):
        """
        crossover two parents to create two children.

        for each model parameter, the parent's value is randomly assigned to either one of the children.

        @param parent1: numpy structured array holding the model of the first parent.
        @param parent2: numpy structured array holding the model of the second parent.
        @return: tuple of the two crossed children.
            these are two new ndarray instances that are independent of their parents.
        """
        child1 = parent1.copy()
        child2 = parent2.copy()
        for key in self.model_start:
            if random.random() >= 0.5:
                child1[key], child2[key] = parent2[key], parent1[key]
        return child1, child2

    def mutate_weak(self, model, probability):
        """
        apply a weak mutation to a model.

        each parameter is changed to a different value in the parameter space at the given probability.
        the amount of change has a gaussian distribution with a standard deviation of mutation_step.

        @param[in,out] model: structured numpy.ndarray holding the model parameters.
            model is modified in place.

        @param probability: probability between 0 and 1 at which to change a parameter.
            0 = no change, 1 = force change.

        @return: model (same instance as the @c model input argument).
        """
        for key in self.model_start:
            if random.random() < probability:
                model[key] += random.gauss(0, self.mutation_step[key])
        return model

    def mutate_strong(self, model, probability):
        """
        apply a strong mutation to a model.

        each parameter is changed to a random value in the parameter space at the given probability.

        @param[in,out] model: structured numpy.ndarray holding the model parameters.
            model is modified in place.

        @param probability: probability between 0 and 1 at which to change a parameter.
            0 = no change, 1 = force change.

        @return: model (same instance as the @c model input argument).
        """
        for key in self.model_start:
            if random.random() < probability:
                model[key] = (self.model_max[key] - self.model_min[key]) * random.random() + self.model_min[key]
        return model

    def mutate_duplicate(self, model):
        """
        mutate a model if it is identical to a previously calculated one.

        if the model was calculated before, the mutate_weak mutation is applied with probability 1.

        @param[in,out] model: structured numpy.ndarray holding the model parameters.
            model is modified in place.

        @return: model (same instance as the @c model input argument).
        """
        try:
            self.find_model(model)
            self.mutate_weak(model, 1.0)
        except ValueError:
            pass
        return model


class GeneticOptimizationHandler(population.PopulationHandler):
    """
    model handler which implements a genetic algorithm.

    """

    def __init__(self):
        super(GeneticOptimizationHandler, self).__init__()
        self._pop = GeneticPopulation()