368 lines
16 KiB
Python
Executable File
368 lines
16 KiB
Python
Executable File
#!/usr/bin/env python
|
|
"""
|
|
@package pmsco.reports.genetic
|
|
graphics rendering module for population genetics.
|
|
|
|
the module can be used in several different ways:
|
|
|
|
1. via the command line on a pmsco database or .dat results file.
|
|
this is the most simple but least flexible way.
|
|
2. via python functions on given population arrays or database queries.
|
|
this is the most flexible way but requires understanding of the required data formats.
|
|
3. as a listener on calculation events. (to be implemented)
|
|
this will be configurable in the run file.
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2021 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import numpy as np
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
if __name__ == "__main__":
|
|
pmsco_root = Path(__file__).resolve().parent.parent.parent
|
|
if str(pmsco_root) not in sys.path:
|
|
sys.path.insert(0, str(pmsco_root))
|
|
|
|
import pmsco.reports.results as rp_results
|
|
import pmsco.database.util as db_util
|
|
import pmsco.database.query as db_query
|
|
from pmsco.reports.base import ProjectReport
|
|
from pmsco.reports.population import GenerationTracker
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
from matplotlib.figure import Figure
|
|
from matplotlib.ticker import MaxNLocator
|
|
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
|
# from matplotlib.backends.backend_pdf import FigureCanvasPdf
|
|
# from matplotlib.backends.backend_svg import FigureCanvasSVG
|
|
except ImportError:
|
|
Figure = None
|
|
FigureCanvas = None
|
|
MaxNLocator = None
|
|
logger.warning("error importing matplotlib. graphics rendering disabled.")
|
|
|
|
|
|
def plot_genetic(filename, rpos2d, param_labels, title=None, cmap=None, canvas=None):
|
|
"""
|
|
produce a genetic chart from the given data.
|
|
|
|
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
|
|
the chart should highlight the amount of diversity in the population
|
|
and - by comparing charts of different generations - the changes due to mutation.
|
|
the axes are the model parameter (x) and particle number (y).
|
|
the colour is mapped from the relative position of a parameter value within the parameter range.
|
|
|
|
in contrast to render_genetic_chart() this function contains only the drawing code.
|
|
it requires input in the final form and does not do any checks, conversion or processing.
|
|
|
|
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
|
|
|
this function requires the matplotlib module.
|
|
if it is not available, the function raises an error.
|
|
|
|
@param filename: path and name of the output file without extension.
|
|
@param rpos2d: (two-dimensional numpy array of numeric type)
|
|
relative positions of the particles in the model space.
|
|
dimension 0 (y-axis) is the particle index,
|
|
dimension 1 (x-axis) is the parameter index (in the order given by param_labels).
|
|
all values must be between 0 and 1.
|
|
@param param_labels: (sequence) list or tuple of parameter names.
|
|
@param title: (str) string to be printed as chart title. default is 'genetic chart'.
|
|
@param cmap: (str) name of colour map supported by matplotlib.
|
|
default is 'jet'.
|
|
other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
|
|
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
|
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
|
some other options are:
|
|
matplotlib.backends.backend_pdf.FigureCanvasPdf or
|
|
matplotlib.backends.backend_svg.FigureCanvasSVG.
|
|
|
|
@return (str) path and name of the generated graphics file.
|
|
None if no file was generated due to an error.
|
|
"""
|
|
if canvas is None:
|
|
canvas = FigureCanvas
|
|
if canvas is None or Figure is None:
|
|
return None
|
|
if cmap is None:
|
|
cmap = 'jet'
|
|
if title is None:
|
|
title = 'genetic chart'
|
|
|
|
fig = Figure()
|
|
canvas(fig)
|
|
ax = fig.add_subplot(111)
|
|
im = ax.imshow(rpos2d, aspect='auto', cmap=cmap, origin='lower')
|
|
im.set_clim((0.0, 1.0))
|
|
ax.set_xticks(np.arange(len(param_labels)))
|
|
ax.set_xticklabels(param_labels, rotation=45, ha="right", rotation_mode="anchor")
|
|
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
|
|
ax.set_ylabel('particle')
|
|
ax.set_title(title)
|
|
cb = ax.figure.colorbar(im, ax=ax)
|
|
cb.ax.set_ylabel("relative value", rotation=-90, va="bottom")
|
|
|
|
out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
|
|
try:
|
|
fig.savefig(out_filename)
|
|
except OSError:
|
|
logger.exception(f"exception while saving figure {out_filename}")
|
|
out_filename = None
|
|
|
|
return out_filename
|
|
|
|
|
|
class GeneticPlot(ProjectReport, GenerationTracker):
|
|
"""
|
|
produce two-dimensional genetic population charts
|
|
|
|
this class collects and validates all parameters and data for generating a series of genetic charts.
|
|
it iterates over generations and calls plot_genetic() for each.
|
|
|
|
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
|
|
the axes are the particle number and the model parameter.
|
|
the colour is mapped from the relative position of a parameter value within the parameter range.
|
|
|
|
the chart should illustrate the diversity in the population.
|
|
converged parameters will show similar colours.
|
|
by comparing charts of different generations, the effect of the optimization algorithm can be examined.
|
|
though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.
|
|
|
|
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self._modes = ['genetic', 'swarm']
|
|
self.result_data = rp_results.ResultData()
|
|
self.filename_format = "${base}-genetic-${gen}"
|
|
self.title_format = "generation ${gen}"
|
|
self.cmap = None
|
|
self.params = None
|
|
|
|
def select_data(self, jobs=-1, calcs=None):
|
|
"""
|
|
query data from the database
|
|
|
|
this method must be implemented by the sub-class.
|
|
|
|
@param jobs: filter by job.
|
|
the argument can be a singleton or sequence of orm.Job objects or numeric id.
|
|
if None, results from all jobs are loaded.
|
|
if -1 (default), results from the most recent job (by datetime field) are loaded.
|
|
|
|
@param calcs: the calcs argument is ignored.
|
|
|
|
@return: None
|
|
"""
|
|
|
|
with self.get_session() as session:
|
|
if jobs == -1:
|
|
jobs = db_query.query_newest_job(session)
|
|
changed_gens = self.changed_generations(session, jobs)
|
|
self.result_data.reset_filters()
|
|
self.result_data.generations = changed_gens
|
|
self.result_data.levels = {'scan': -1}
|
|
self.result_data.load_from_db(session, jobs=jobs)
|
|
if self._project:
|
|
self.result_data.set_model_space(self._project.model_space)
|
|
|
|
def create_report(self):
|
|
"""
|
|
generate the plots based on the stored attributes.
|
|
|
|
this method essentially loops over generations and parameter combinations,
|
|
and compiles the input for plot_genetic.
|
|
|
|
@return: list of created files
|
|
"""
|
|
# check that result data is compatible with genetic plots
|
|
if self.result_data.params is None or len(self.result_data.params) < 2:
|
|
logger.warning("result data must contain at least 2 parameters")
|
|
return []
|
|
if self.result_data.generations is None or len(self.result_data.generations) < 1:
|
|
logger.warning("result data must specify at least 1 generation")
|
|
return []
|
|
if self.result_data.particles is None or len(self.result_data.particles) < 5:
|
|
logger.warning("result data must specify at least 1 particle")
|
|
return []
|
|
|
|
vmin = self.result_data.model_space.min
|
|
vmax = self.result_data.model_space.max
|
|
pnames = self.result_data.non_degenerate_params()
|
|
pnames = sorted(list(pnames), key=str.lower)
|
|
|
|
kwargs = {}
|
|
if self.cmap is not None:
|
|
kwargs['cmap'] = self.cmap
|
|
if self.canvas is not None:
|
|
kwargs['canvas'] = self.canvas
|
|
|
|
files = []
|
|
fdict = {'base': self.base_filename}
|
|
|
|
for rd in self.result_data.iterate_generations():
|
|
fdict['gen'] = int(rd.generations[0])
|
|
filename = Path(self.report_dir, self.filename_format)
|
|
filename = Path(self.resolve_template(filename, fdict))
|
|
kwargs['title'] = self.resolve_template(self.title_format, fdict)
|
|
sorted_values = np.sort(rd.values, order='_particle')
|
|
values_2d = np.zeros((sorted_values.shape[0], len(pnames)))
|
|
for index, pname in enumerate(pnames):
|
|
values_2d[:, index] = (sorted_values[pname] - vmin[pname]) / (vmax[pname] - vmin[pname])
|
|
of = plot_genetic(filename, values_2d, pnames, **kwargs)
|
|
if of:
|
|
files.append(of)
|
|
|
|
return files
|
|
|
|
|
|
def render_genetic(output_file, values, model_space=None, generations=None, title=None, cmap=None,
|
|
canvas=None):
|
|
"""
|
|
produce a genetic chart from a given population.
|
|
|
|
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
|
|
the axes are the particle number and the model parameter.
|
|
the colour is mapped from the relative position of a parameter value within the parameter range.
|
|
|
|
the chart should illustrate the diversity in the population.
|
|
converged parameters will show similar colours.
|
|
by comparing charts of different generations, the effect of the optimization algorithm can be examined.
|
|
though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.
|
|
|
|
the function requires input in one of the following forms:
|
|
- a result (.dat) file or
|
|
numpy structured array.
|
|
the array must contain regular parameters, as well as the _particle and _gen columns.
|
|
other columns are ignored.
|
|
the function generates one chart per generation unless the generation argument is specified.
|
|
- a file (file name or file object) or numpy structured array.
|
|
the array must be wrapped in a sequence (tuple or list) for compatibility with other functions.
|
|
the array must essentially be in the same format as the corresponding member of the Population class.
|
|
the array must contain regular parameters, as well as the _particle columns.
|
|
files are loaded by numpy.genfromtxt.
|
|
- a pmsco.optimizers.population.Population object with valid data.
|
|
the generation is taken from the respective attribute and overrides the function argument.
|
|
- an open pmsco database session. the most recent job results are loaded.
|
|
|
|
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
|
|
|
this function requires the matplotlib module.
|
|
if it is not available, the function raises an error.
|
|
|
|
@param output_file: path and base name of the output file without extension.
|
|
a generation index and the file extension according to the file format are appended.
|
|
@param values: a numpy structured ndarray of a population or result list from an optimization run.
|
|
alternatively, the file path of a result file (.dat) or population file (.pop) can be given.
|
|
file can be any object that numpy.genfromtxt() can handle.
|
|
array or file must be wrapped in a sequence.
|
|
@param model_space: model space can be a pmsco.project.ModelSpace object,
|
|
any object that contains the same min and max attributes as pmsco.project.ModelSpace,
|
|
or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
|
|
by default, the model space boundaries are derived from the input data.
|
|
if a model_space is specified, only the parameters listed in it are plotted.
|
|
@param generations: (int or sequence) generation index or list of indices.
|
|
this index is used in the output file name and for filtering input data by generation.
|
|
if the input data does not contain the generation, no filtering is applied.
|
|
by default, no filtering is applied, and one graph for each generation is produced.
|
|
@param title: (str) title of the chart.
|
|
the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
|
|
default: derived from file name.
|
|
@param cmap: (str) name of colour map supported by matplotlib.
|
|
default is 'jet'.
|
|
other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
|
|
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
|
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
|
some other options are:
|
|
matplotlib.backends.backend_pdf.FigureCanvasPdf or
|
|
matplotlib.backends.backend_svg.FigureCanvasSVG.
|
|
|
|
@return (list of str) paths of the generated graphics files.
|
|
empty if an error occurred.
|
|
|
|
@raise TypeError if matplotlib is not available.
|
|
"""
|
|
|
|
data = rp_results.ResultData()
|
|
if isinstance(generations, int):
|
|
generations = (generations,)
|
|
data.generations = generations
|
|
data.levels = {'scan': -1}
|
|
data.load_any(values)
|
|
if model_space is not None:
|
|
data.set_model_space(model_space)
|
|
|
|
plot = GeneticPlot()
|
|
plot.canvas = canvas
|
|
plot.cmap = cmap
|
|
if title:
|
|
plot.title_format = title
|
|
else:
|
|
plot.title_format = "${gen}"
|
|
plot.report_dir = Path(output_file).parent
|
|
plot.filename_format = Path(output_file).name + "-${gen}"
|
|
plot.validate(None)
|
|
plot.result_data = data
|
|
files = plot.create_report()
|
|
|
|
return files
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
description="""
|
|
population genetics plot for multiple-scattering optimization results
|
|
|
|
this module operates on results or database files and produces one graphics file per generation.
|
|
|
|
database files contain the complete information for all plot types.
|
|
data from the most recent job stored in the database is used.
|
|
|
|
.dat results files contain all data shown in genetic plots.
|
|
.tasks.dat files lack the generation and particle identification and should not be used.
|
|
|
|
note that the plot type is independent of the optimization mode.
|
|
it's possible to generate genetic plots from a particle swarm optimization and vice versa.
|
|
""")
|
|
parser.add_argument('results_file',
|
|
help="path to results file (.dat) or sqlite3 database file.")
|
|
parser.add_argument('output_file',
|
|
help="base name of output file. generation and extension will be appended.")
|
|
parser.add_argument('-t', '--title', default=None,
|
|
help='graph title. may contain {gen} as a placeholder for the generation number.')
|
|
|
|
args, unknown_args = parser.parse_known_args()
|
|
|
|
kwargs = {}
|
|
if args.title is not None:
|
|
kwargs['title'] = args.title
|
|
|
|
render_func = render_genetic
|
|
|
|
if db_util.is_sqlite3_file(args.results_file):
|
|
import pmsco.database.access as db_access
|
|
db = db_access.DatabaseAccess()
|
|
db.connect(args.results_file)
|
|
with db.session() as session:
|
|
render_func(args.output_file, session, **kwargs)
|
|
else:
|
|
render_func(args.output_file, args.results_file, **kwargs)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
sys.exit(0)
|