public release 2.2.0 - see README.md and CHANGES.md for details

2020-09-04 16:22:42 +02:00
parent fbd2d4fa8c
commit 7c61eb1b41
67 changed files with 2934 additions and 682 deletions
--- a/pmsco/optimizers/genetic.py
+++ b/pmsco/optimizers/genetic.py
@ -45,7 +45,7 @@ class GeneticPopulation(population.Population):
    4. mutation: a gene may mutate at random.
    5. selection: the globally best individual is added to a parent population (and replaces the worst).

-    the main tuning parameter of the algorithm is the mutation_step which is copied from the domain.step.
+    the main tuning parameter of the algorithm is the mutation_step which is copied from the model_space.step.
    it defines the width of a gaussian distribution of change under a weak mutation.
    it should be large enough so that the whole parameter space can be probed,
    but small enough that a frequent mutation does not throw the individual out of the convergence region.
@ -92,9 +92,9 @@ class GeneticPopulation(population.Population):
    ## @var mutation_step
    #
    # standard deviations of the exponential distribution function used in the mutate_weak() method.
-    # the variable is a dictionary with the same keys as model_step (the parameter domain).
+    # the variable is a dictionary with the same keys as model_step (the parameter space).
    #
-    # it is initialized from the domain.step
+    # it is initialized from the model_space.step
    # or set to a default value based on the parameter range and population size.

    def __init__(self):
@ -110,15 +110,15 @@ class GeneticPopulation(population.Population):
        self.position_constrain_mode = 'random'
        self.mutation_step = {}

-    def setup(self, size, domain, **kwargs):
+    def setup(self, size, model_space, **kwargs):
        """
        @copydoc Population.setup()

        in addition to the inherited behaviour, this method initializes self.mutation_step.
-        mutation_step of a parameter is set to its domain.step if non-zero.
+        mutation_step of a parameter is set to its model_space.step if non-zero.
        otherwise it is set to the parameter range divided by the population size.
        """
-        super(GeneticPopulation, self).setup(size, domain, **kwargs)
+        super(GeneticPopulation, self).setup(size, model_space, **kwargs)

        for key in self.model_step:
            val = self.model_step[key]
@ -131,7 +131,7 @@ class GeneticPopulation(population.Population):
        this implementation is a new proposal.
        the distribution is not completely random.
        rather, a position vector (by parameter) is initialized with a linear function
-        that covers the parameter domain.
+        that covers the parameter space.
        the linear function is then permuted randomly.

        the method does not update the particle info fields.
@ -243,7 +243,7 @@ class GeneticPopulation(population.Population):
        """
        apply a weak mutation to a model.

-        each parameter is changed to a different value in the domain of the parameter at the given probability.
+        each parameter is changed to a different value in the parameter space at the given probability.
        the amount of change has a gaussian distribution with a standard deviation of mutation_step.

        @param[in,out] model: structured numpy.ndarray holding the model parameters.
@ -263,7 +263,7 @@ class GeneticPopulation(population.Population):
        """
        apply a strong mutation to a model.

-        each parameter is changed to a random value in the domain of the parameter at the given probability.
+        each parameter is changed to a random value in the parameter space at the given probability.

        @param[in,out] model: structured numpy.ndarray holding the model parameters.
            model is modified in place.
--- a/pmsco/optimizers/gradient.py
+++ b/pmsco/optimizers/gradient.py
@ -8,7 +8,7 @@ the optimization task is distributed over multiple processes using MPI.
 the optimization must be started with N+1 processes in the MPI environment,
 where N equals the number of fit parameters.

-IMPLEMENTATION IN PROGRESS - DEBUGGING
+THIS MODULE IS NOT INTEGRATED INTO PMSCO YET.

 Requires: scipy, numpy

@ -109,7 +109,7 @@ class MscMaster(MscProcess):

    def setup(self, project):
        super(MscMaster, self).setup(project)
-        self.dom = project.create_domain()
+        self.dom = project.create_model_space()
        self.running_slaves = self.slaves

        self._outfile = open(self.project.output_file + ".dat", "w")
--- a/pmsco/optimizers/grid.py
+++ b/pmsco/optimizers/grid.py
@ -63,7 +63,7 @@ class GridPopulation(object):
    ## @var positions
    # (numpy.ndarray) flat list of grid coordinates and results.
    #
-    # the column names include the names of the model parameters, taken from domain.start,
+    # the column names include the names of the model parameters, taken from model_space.start,
    # and the special names @c '_model', @c '_rfac'.
    # the special fields have the following meanings:
    #
@ -113,11 +113,12 @@ class GridPopulation(object):
        dt.sort(key=lambda t: t[0].lower())
        return dt

-    def setup(self, domain):
+    def setup(self, model_space):
        """
        set up the population and result arrays.

-        @param domain: definition of initial and limiting model parameters
+        @param model_space: (pmsco.project.ModelSpace)
+            definition of initial and limiting model parameters
            expected by the cluster and parameters functions.
            the attributes have the following meanings:
            @arg start: values of the fixed parameters.
@ -128,24 +129,24 @@ class GridPopulation(object):
                        if step <= 0, the parameter is kept constant.

        """
-        self.model_start = domain.start
-        self.model_min = domain.min
-        self.model_max = domain.max
-        self.model_step = domain.step
+        self.model_start = model_space.start
+        self.model_min = model_space.min
+        self.model_max = model_space.max
+        self.model_step = model_space.step

        self.model_count = 1
        self.search_keys = []
        self.fixed_keys = []
        scales = []

-        for p in domain.step.keys():
-            if domain.step[p] > 0:
-                n = np.round((domain.max[p] - domain.min[p]) / domain.step[p]) + 1
+        for p in model_space.step.keys():
+            if model_space.step[p] > 0:
+                n = int(np.round((model_space.max[p] - model_space.min[p]) / model_space.step[p]) + 1)
            else:
                n = 1
            if n > 1:
                self.search_keys.append(p)
-                scales.append(np.linspace(domain.min[p], domain.max[p], n))
+                scales.append(np.linspace(model_space.min[p], model_space.max[p], n))
            else:
                self.fixed_keys.append(p)

@ -221,7 +222,7 @@ class GridPopulation(object):

        @raise AssertionError if the number of rows of the two files differ.
        """
-        data = np.genfromtxt(filename, names=True)
+        data = np.atleast_1d(np.genfromtxt(filename, names=True))
        assert data.shape == array.shape
        for name in data.dtype.names:
            array[name] = data[name]
@ -298,12 +299,12 @@ class GridSearchHandler(handlers.ModelHandler):
            the minimum number of slots is 1, the recommended value is 10 or greater.
            the population size is set to at least 4.

-        @return:
+        @return (int) number of models to be calculated.
        """
        super(GridSearchHandler, self).setup(project, slots)

        self._pop = GridPopulation()
-        self._pop.setup(self._project.create_domain())
+        self._pop.setup(self._project.create_model_space())
        self._invalid_limit = max(slots, self._invalid_limit)

        self._outfile = open(self._project.output_file + ".dat", "w")
@ -311,7 +312,7 @@ class GridSearchHandler(handlers.ModelHandler):
        self._outfile.write(" ".join(self._pop.positions.dtype.names))
        self._outfile.write("\n")

-        return None
+        return self._pop.model_count

    def cleanup(self):
        self._outfile.close()
--- a/pmsco/optimizers/population.py
+++ b/pmsco/optimizers/population.py
@ -3,7 +3,7 @@
 base classes for population-based optimizers.

 a _population_ is a set of individuals or particles
-that can assume coordinates from the parameter domain.
+that can assume coordinates from the parameter space.
 a tuple of coordinates is also called _model parameters_ which define the _model_.
 the individuals travel through parameter space according to an algorithm defined separately.
 depending on the algorithm, the population can converge towards the optimum coordinates based on calculated R-factors.
@ -117,7 +117,7 @@ class Population(object):
    ## @var pos
    # (numpy.ndarray) current positions of each particle.
    #
-    # the column names include the names of the model parameters, taken from domain.start,
+    # the column names include the names of the model parameters, taken from model_space.start,
    # and the special names @c '_particle', @c '_model', @c '_rfac'.
    # the special fields have the following meanings:
    #
@ -299,7 +299,7 @@ class Population(object):
            arr[k] = model_dict[k]
        return arr

-    def setup(self, size, domain, **kwargs):
+    def setup(self, size, model_space, **kwargs):
        """
        set up the population arrays seeded with previous results and the start model.

@ -315,12 +315,12 @@ class Population(object):

        @param size: requested number of particles.

-        @param domain: definition of initial and limiting model parameters
+        @param model_space: definition of initial and limiting model parameters
            expected by the cluster and parameters functions.
-            @arg domain.start: initial guess.
-            @arg domain.min:   minimum values allowed.
-            @arg domain.max:   maximum values allowed. if min == max, the parameter is kept constant.
-            @arg domain.step:  depends on the actual algorithm.
+            @arg model_space.start: initial guess.
+            @arg model_space.min:   minimum values allowed.
+            @arg model_space.max:   maximum values allowed. if min == max, the parameter is kept constant.
+            @arg model_space.step:  depends on the actual algorithm.
                not used in particle swarm.
                standard deviation of mutations in genetic optimization.

@ -335,14 +335,14 @@ class Population(object):
        """
        self.size_req = size
        self.size_act = size
-        self.model_start = domain.start
-        self.model_min = domain.min
-        self.model_max = domain.max
-        self.model_step = domain.step
-        self.model_start_array = self.get_model_array(domain.start)
-        self.model_min_array = self.get_model_array(domain.min)
-        self.model_max_array = self.get_model_array(domain.max)
-        self.model_step_array = self.get_model_array(domain.step)
+        self.model_start = model_space.start
+        self.model_min = model_space.min
+        self.model_max = model_space.max
+        self.model_step = model_space.step
+        self.model_start_array = self.get_model_array(model_space.start)
+        self.model_min_array = self.get_model_array(model_space.min)
+        self.model_max_array = self.get_model_array(model_space.max)
+        self.model_step_array = self.get_model_array(model_space.step)

        # allocate arrays
        dt = self.get_pop_dtype(self.model_start)
@ -378,8 +378,8 @@ class Population(object):
        """
        initializes a random population.

-        the position array is filled with random values (uniform distribution) from the parameter domain.
-        velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain.
+        the position array is filled with random values (uniform distribution) from the parameter space.
+        velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter space.

        the method does not update the particle info fields.

@ -402,8 +402,8 @@ class Population(object):
        the method does not update the particle info fields.

        @param params: dictionary of model parameters.
-            the keys must match the ones of domain.start.
-            values that lie outside of the domain are skipped.
+            the keys must match the ones of model_space.start.
+            values that lie outside of the model space are skipped.

        @param index: index of the particle that is seeded.
            the index must be in the allowed range of the self.pos array.
@ -440,7 +440,7 @@ class Population(object):
        this method is called as a part of setup().
        it must not be called after the optimization has started.

-        parameter values that lie outside the parameter domain (min/max) are left at their previous value.
+        parameter values that lie outside the model space (min/max) are left at their previous value.

        @note this method does not initialize the remaining particles.
            neither does it set the velocity and best position arrays of the seeded particles.
@ -488,7 +488,7 @@ class Population(object):
            count_limit = self.pos.shape[0]
        count_limit = min(count_limit, self.pos.shape[0] - first_particle)

-        seed = np.genfromtxt(seed_file, names=True)
+        seed = np.atleast_1d(np.genfromtxt(seed_file, names=True))
        try:
            seed = seed[seed['_rfac'] <= rfac_limit]
        except ValueError:
@ -561,7 +561,7 @@ class Population(object):
        this method does not handle exceptions coming from numpy.genfromtxt
        such as missing file (IOError) or conversion errors (ValueError).
        exception handling should be done by the owner of the population (typically the model handler).
-        patch values that lie outside the population domain aresilently ignored.
+        patch values that lie outside the model space are silently ignored.

        @param patch_file: path and name of the patch file.
            the file must have the correct format for load_array(),
@ -572,7 +572,7 @@ class Population(object):

        @raise ValueError for conversion errors.
        """
-        self.pos_patch = np.genfromtxt(patch_file, names=True)
+        self.pos_patch = np.atleast_1d(np.genfromtxt(patch_file, names=True))
        source_fields = set(self.pos_patch.dtype.names)
        dest_fields = set(self.model_start.keys())
        common_fields = source_fields & dest_fields
@ -592,7 +592,7 @@ class Population(object):

        the method overwrites only parameter values, not control variables.
        _particle indices that lie outside the range of available population items are ignored.
-        parameter values that lie outside the parameter domain (min/max) are ignored.
+        parameter values that lie outside the model space (min/max) are ignored.
        """
        if self.pos_patch is not None:
            logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation))
@ -658,7 +658,7 @@ class Population(object):
        elif isinstance(source, str):
            for i in range(timeout):
                try:
-                    array = np.genfromtxt(source, names=True)
+                    array = np.atleast_1d(np.genfromtxt(source, names=True))
                except IOError:
                    time.sleep(1)
                else:
@ -708,7 +708,7 @@ class Population(object):

        the method also performs a range check.
        the parameter values are constrained according to self.position_constrain_mode
-        and the parameter domain self.model_min and self.model_max.
+        and the model space self.model_min and self.model_max.
        if the constrain mode is `error`, models that violate the constraints are ignored
        and removed from the import queue.

@ -844,18 +844,18 @@ class Population(object):
        """
        constrain a position to the given bounds.

-        this method resolves violations of parameter boundaries, i.e. when a particle is leaving the designated domain.
-        if a violation is detected, the method calculates an updated position inside the domain
+        this method resolves violations of parameter boundaries, i.e. when a particle is leaving the designated model space.
+        if a violation is detected, the method calculates an updated position inside the model space
        according to the selected algorithm.
        in some cases the velocity or boundaries have to be updated as well.

        the method distinguishes overshoot and undershoot violations.
-        overshoot is the normal case when the particle is leaving the domain.
+        overshoot is the normal case when the particle is leaving the model space.
        it is handled according to the selected algorithm.

        undershoot is a special case where the particle was outside the boundaries before the move.
        this case can occur in the beginning if the population is seeded with out-of-bounds values.
-        undershoot is always handled by placing the particle at a random position in the domain
+        undershoot is always handled by placing the particle at a random position in the model space
        regardless of the chosen constraint mode.

        @note it is important to avoid bias while handling constraint violations.
@ -877,7 +877,7 @@ class Population(object):

        @param _mode: what to do if a boundary constraint is violated:
            @arg 're-enter': re-enter from the opposite side of the parameter interval.
-            @arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain.
+            @arg 'bounce': fold the motion vector at the boundary and move the particle back into the model space.
            @arg 'scatter': place the particle at a random place between its old position and the violated boundary.
            @arg 'stick': place the particle at the violated boundary.
            @arg 'expand': move the boundary so that the particle fits.
@ -982,7 +982,7 @@ class Population(object):
        @param search_array: population-like numpy structured array to search for the model.
            defaults to self.results if None.

-        @param precision: precision relative to model domain at which elements should be considered equal.
+        @param precision: precision relative to model space at which elements should be considered equal.

        @return index of the first occurrence.

@ -1071,7 +1071,7 @@ class Population(object):

        @raise AssertionError if the number of rows of the two files differ.
        """
-        data = np.genfromtxt(filename, names=True)
+        data = np.atleast_1d(np.genfromtxt(filename, names=True))
        assert data.shape == array.shape
        for name in data.dtype.names:
            array[name] = data[name]
@ -1182,7 +1182,7 @@ class PopulationHandler(handlers.ModelHandler):
        which may slow down convergence.

        if calculations take a long time compared to the available computation time
-        or spawn a lot of sub-tasks due to complex symmetry,
+        or spawn a lot of sub-tasks due to complex model space,
        and you prefer to allow for a good number of generations,
        you should override the population size.

@ -1190,7 +1190,7 @@ class PopulationHandler(handlers.ModelHandler):

        @param slots: number of calculation processes available through MPI.

-        @return: None
+        @return (int) population size
        """
        super(PopulationHandler, self).setup(project, slots)

@ -1206,10 +1206,10 @@ class PopulationHandler(handlers.ModelHandler):
            outfile.write(" ".join(self._pop.results.dtype.names))
            outfile.write("\n")

-        return None
+        return self._pop_size

    def setup_population(self):
-        self._pop.setup(self._pop_size, self._project.create_domain(), **self._project.optimizer_params)
+        self._pop.setup(self._pop_size, self._project.create_model_space(), **self._project.optimizer_params)

    def cleanup(self):
        super(PopulationHandler, self).cleanup()
@ -1235,7 +1235,7 @@ class PopulationHandler(handlers.ModelHandler):
        the effect can be reduced by making the population larger than the number of processes.

        the created tasks are returned as the function result and added to self._pending_tasks.
-
+        
        @return list of generated tasks.
            empty list if the optimization has converged (see Population.is_converged())
            or if the time limit is approaching.
--- a/pmsco/optimizers/table.py
+++ b/pmsco/optimizers/table.py
@ -83,8 +83,8 @@ class TablePopulation(population.Population):
    although, a seed file is accepted, it is not used.
    patching is allowed, but there is normally no advantage over modifying the table.

-    the domain is used to define the model parameters and the parameter range.
-    models violating the parameter domain are ignored.
+    the model space is used to define the model parameters and the parameter range.
+    models violating the parameter model space are ignored.
    """

    ## @var table_source
@ -103,20 +103,20 @@ class TablePopulation(population.Population):
        self.table_source = None
        self.position_constrain_mode = 'error'

-    def setup(self, size, domain, **kwargs):
+    def setup(self, size, model_space, **kwargs):
        """
-        set up the population arrays, parameter domain and data source.
+        set up the population arrays, parameter model space and data source.

        @param size: requested number of particles.
            this does not need to correspond to the number of table entries.
            on each generation the population loads up to this number of new entries from the table source.

-        @param domain: definition of initial and limiting model parameters
+        @param model_space: definition of initial and limiting model parameters
            expected by the cluster and parameters functions.
-            @arg domain.start: not used.
-            @arg domain.min:   minimum values allowed.
-            @arg domain.max:   maximum values allowed.
-            @arg domain.step:  not used.
+            @arg model_space.start: not used.
+            @arg model_space.min:   minimum values allowed.
+            @arg model_space.max:   maximum values allowed.
+            @arg model_space.step:  not used.

        the following arguments are keyword arguments.
        the method also accepts the inherited arguments for seeding. they do not have an effect, however.
@ -128,7 +128,7 @@ class TablePopulation(population.Population):

        @return: None
        """
-        super(TablePopulation, self).setup(size, domain, **kwargs)
+        super(TablePopulation, self).setup(size, model_space, **kwargs)
        self.table_source = kwargs['table_source']

    def advance_population(self):