public distro 2.1.0

2019-07-19 12:54:54 +02:00
parent acea809e4e
commit fbd2d4fa8c
40 changed files with 2813 additions and 345 deletions
--- a/pmsco/optimizers/grid.py
+++ b/pmsco/optimizers/grid.py
@ -344,9 +344,14 @@ class GridSearchHandler(handlers.ModelHandler):
        time_pending += self._model_time
        if time_pending > time_avail:
            self._timeout = True
+            logger.warning("time limit reached")
+
+        if self._invalid_count > self._invalid_limit:
+            self._timeout = True
+            logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)

        model = self._next_model
-        if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
+        if not self._timeout and model < self._pop.model_count:
            new_task = parent_task.copy()
            new_task.parent_id = parent_id
            pos = self._pop.positions[model]
--- a/pmsco/optimizers/population.py
+++ b/pmsco/optimizers/population.py
@ -491,9 +491,9 @@ class Population(object):
        seed = np.genfromtxt(seed_file, names=True)
        try:
            seed = seed[seed['_rfac'] <= rfac_limit]
-        except KeyError:
-            logger.warning(BMsg("missing _rfac column in seed file {hf}. ignoring seed file.", hf=seed_file))
-            return 0
+        except ValueError:
+            recalc_seed = True
+            logger.warning(BMsg("missing _rfac column in seed file {hf}. re-calculating.", hf=seed_file))
        else:
            seed.sort(order='_rfac')
        seed_size = min(seed.shape[0], count_limit)
@ -508,9 +508,12 @@ class Population(object):
        if len(common_fields) < len(dest_fields):
            logger.warning(BMsg("missing columns in seed file {hf}.", hf=seed_file))

-        logger.info(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))
+        logger.warning(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))

-        self.pos['_rfac'][first:last] = seed['_rfac']
+        try:
+            self.pos['_rfac'][first:last] = seed['_rfac']
+        except ValueError:
+            self.pos['_rfac'][first:last] = 2.1
        dest_index = np.arange(first, last)
        for name in common_fields:
            sel1 = np.less_equal(self.model_min[name], seed[name])
@ -525,11 +528,11 @@ class Population(object):
        self.pos['_model'][first:last] = np.arange(seed_size) + first
        if recalc_seed:
            self.pos['_rfac'][first:last] = 2.1
-            logger.info("models from seed file are re-calculated.")
+            logger.warning("models from seed file are re-calculated.")
        else:
            sel = self.pos['_rfac'][first:last] <= rfac_limit
            self.pos['_gen'][dest_index[sel]] = -1
-            logger.info(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))
+            logger.warning(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))

        return seed_size

@ -579,7 +582,7 @@ class Population(object):
        if len(common_fields) < len(dest_fields):
            logger.warning(BMsg("loaded patch file {pf}. some columns are missing.", pf=patch_file))
        else:
-            logger.info(BMsg("loaded patch file {pf}.", pf=patch_file))
+            logger.warning(BMsg("loaded patch file {pf}.", pf=patch_file))

    def _apply_patch(self):
        """
@ -592,7 +595,7 @@ class Population(object):
        parameter values that lie outside the parameter domain (min/max) are ignored.
        """
        if self.pos_patch is not None:
-            logger.info(BMsg("patching the population with new positions."))
+            logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation))
            source_fields = set(self.pos_patch.dtype.names)
            dest_fields = set(self.model_start.keys())
            common_fields = source_fields & dest_fields
@ -996,10 +999,10 @@ class Population(object):

        # rewrite model, tolerance and results as two-dimensional array
        if search_array is None:
-            results = self.results[names].copy()
-        else:
-            results = search_array[names].copy()
-        results = results.view((results.dtype[0], len(names)))
+            search_array = self.results
+        results = np.empty((search_array.shape[0], len(names)))
+        for col, name in enumerate(names):
+            results[:, col] = search_array[name]
        model = np.asarray(model_tuple, results.dtype)
        tol = np.asarray([max(abs(self.model_max[name]), abs(self.model_min[name]), precision)
                          for name in names])
@ -1169,8 +1172,9 @@ class PopulationHandler(handlers.ModelHandler):
        """
        initialize the particle swarm and open an output file.

-        the population size is set to project.optimizer_params.['pop_size'] if it is defined and greater than 4.
-        otherwise, it defaults to <code>max(2 * slots, 4)</code>.
+        the population size is set to `project.optimizer_params['pop_size']`
+        if it is defined and greater than 4.
+        otherwise, it defaults to `max(slots, 4)`.

        for good efficiency the population size (number of particles) should be
        greater or equal to the number of available processing slots,
@ -1191,7 +1195,9 @@ class PopulationHandler(handlers.ModelHandler):
        super(PopulationHandler, self).setup(project, slots)

        _min_size = 4
-        self._pop_size = max(project.optimizer_params.get('pop_size', self._slots * 2), _min_size)
+        _def_size = self._slots
+        _req_size = project.optimizer_params.get('pop_size', 0)
+        self._pop_size = _req_size if _req_size >= _min_size else _def_size
        self.setup_population()
        self._invalid_limit = self._pop_size * 10

@ -1228,7 +1234,11 @@ class PopulationHandler(handlers.ModelHandler):
        because the best peer position in the generation may not be known yet.
        the effect can be reduced by making the population larger than the number of processes.

-        @return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
+        the created tasks are returned as the function result and added to self._pending_tasks.
+
+        @return list of generated tasks.
+            empty list if the optimization has converged (see Population.is_converged())
+            or if the time limit is approaching.
        """

        super(PopulationHandler, self).create_tasks(parent_task)
@ -1241,7 +1251,7 @@ class PopulationHandler(handlers.ModelHandler):
        time_pending = self._model_time * len(self._pending_tasks)
        time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)

-        out_tasks = []
+        new_tasks = []
        if not self._timeout and not self._converged:
            self._check_patch_file()
            self._pop.advance_population()
@ -1250,7 +1260,8 @@ class PopulationHandler(handlers.ModelHandler):
                time_pending += self._model_time
                if time_pending > time_avail:
                    self._timeout = True
-                    logger.info("time limit reached")
+                    logger.warning("time limit reached")
+                    new_tasks = []
                    break

                if pos['_gen'] >= 0:
@ -1258,12 +1269,12 @@ class PopulationHandler(handlers.ModelHandler):
                    new_task.parent_id = parent_id
                    new_task.model = pos
                    new_task.change_id(model=pos['_model'])
+                    new_tasks.append(new_task)

-                    child_id = new_task.id
-                    self._pending_tasks[child_id] = new_task
-                    out_tasks.append(new_task)
+        for task in new_tasks:
+            self._pending_tasks[task.id] = task

-        return out_tasks
+        return new_tasks

    def _check_patch_file(self):
        """
@ -1323,7 +1334,7 @@ class PopulationHandler(handlers.ModelHandler):

        if task.result_valid:
            if self._pop.is_converged() and not self._converged:
-                logger.info("population converged")
+                logger.warning("population converged")
                self._converged = True

            if task.time > self._model_time: