Files
pmsco-public/pmsco/reports/genetic.py

368 lines
16 KiB
Python
Executable File

#!/usr/bin/env python
"""
@package pmsco.reports.genetic
graphics rendering module for population genetics.
the module can be used in several different ways:
1. via the command line on a pmsco database or .dat results file.
this is the most simple but least flexible way.
2. via python functions on given population arrays or database queries.
this is the most flexible way but requires understanding of the required data formats.
3. as a listener on calculation events. (to be implemented)
this will be configurable in the run file.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2021 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import argparse
import logging
import numpy as np
from pathlib import Path
import sys
if __name__ == "__main__":
pmsco_root = Path(__file__).resolve().parent.parent.parent
if str(pmsco_root) not in sys.path:
sys.path.insert(0, str(pmsco_root))
import pmsco.reports.results as rp_results
import pmsco.database.util as db_util
import pmsco.database.query as db_query
from pmsco.reports.base import ProjectReport
from pmsco.reports.population import GenerationTracker
logger = logging.getLogger(__name__)
try:
from matplotlib.figure import Figure
from matplotlib.ticker import MaxNLocator
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
# from matplotlib.backends.backend_pdf import FigureCanvasPdf
# from matplotlib.backends.backend_svg import FigureCanvasSVG
except ImportError:
Figure = None
FigureCanvas = None
MaxNLocator = None
logger.warning("error importing matplotlib. graphics rendering disabled.")
def plot_genetic(filename, rpos2d, param_labels, title=None, cmap=None, canvas=None):
"""
produce a genetic chart from the given data.
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
the chart should highlight the amount of diversity in the population
and - by comparing charts of different generations - the changes due to mutation.
the axes are the model parameter (x) and particle number (y).
the colour is mapped from the relative position of a parameter value within the parameter range.
in contrast to render_genetic_chart() this function contains only the drawing code.
it requires input in the final form and does not do any checks, conversion or processing.
the graphics file format can be changed by providing a specific canvas. default is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param filename: path and name of the output file without extension.
@param rpos2d: (two-dimensional numpy array of numeric type)
relative positions of the particles in the model space.
dimension 0 (y-axis) is the particle index,
dimension 1 (x-axis) is the parameter index (in the order given by param_labels).
all values must be between 0 and 1.
@param param_labels: (sequence) list or tuple of parameter names.
@param title: (str) string to be printed as chart title. default is 'genetic chart'.
@param cmap: (str) name of colour map supported by matplotlib.
default is 'jet'.
other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
some other options are:
matplotlib.backends.backend_pdf.FigureCanvasPdf or
matplotlib.backends.backend_svg.FigureCanvasSVG.
@return (str) path and name of the generated graphics file.
None if no file was generated due to an error.
"""
if canvas is None:
canvas = FigureCanvas
if canvas is None or Figure is None:
return None
if cmap is None:
cmap = 'jet'
if title is None:
title = 'genetic chart'
fig = Figure()
canvas(fig)
ax = fig.add_subplot(111)
im = ax.imshow(rpos2d, aspect='auto', cmap=cmap, origin='lower')
im.set_clim((0.0, 1.0))
ax.set_xticks(np.arange(len(param_labels)))
ax.set_xticklabels(param_labels, rotation=45, ha="right", rotation_mode="anchor")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.set_ylabel('particle')
ax.set_title(title)
cb = ax.figure.colorbar(im, ax=ax)
cb.ax.set_ylabel("relative value", rotation=-90, va="bottom")
out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
try:
fig.savefig(out_filename)
except OSError:
logger.exception(f"exception while saving figure {out_filename}")
out_filename = None
return out_filename
class GeneticPlot(ProjectReport, GenerationTracker):
"""
produce two-dimensional genetic population charts
this class collects and validates all parameters and data for generating a series of genetic charts.
it iterates over generations and calls plot_genetic() for each.
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
the axes are the particle number and the model parameter.
the colour is mapped from the relative position of a parameter value within the parameter range.
the chart should illustrate the diversity in the population.
converged parameters will show similar colours.
by comparing charts of different generations, the effect of the optimization algorithm can be examined.
though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.
the graphics file format can be changed by providing a specific canvas. default is PNG.
"""
def __init__(self):
super().__init__()
self._modes = ['genetic', 'swarm']
self.result_data = rp_results.ResultData()
self.filename_format = "${base}-genetic-${gen}"
self.title_format = "generation ${gen}"
self.cmap = None
self.params = None
def select_data(self, jobs=-1, calcs=None):
"""
query data from the database
this method must be implemented by the sub-class.
@param jobs: filter by job.
the argument can be a singleton or sequence of orm.Job objects or numeric id.
if None, results from all jobs are loaded.
if -1 (default), results from the most recent job (by datetime field) are loaded.
@param calcs: the calcs argument is ignored.
@return: None
"""
with self.get_session() as session:
if jobs == -1:
jobs = db_query.query_newest_job(session)
changed_gens = self.changed_generations(session, jobs)
self.result_data.reset_filters()
self.result_data.generations = changed_gens
self.result_data.levels = {'scan': -1}
self.result_data.load_from_db(session, jobs=jobs)
if self._project:
self.result_data.set_model_space(self._project.model_space)
def create_report(self):
"""
generate the plots based on the stored attributes.
this method essentially loops over generations and parameter combinations,
and compiles the input for plot_genetic.
@return: list of created files
"""
# check that result data is compatible with genetic plots
if self.result_data.params is None or len(self.result_data.params) < 2:
logger.warning("result data must contain at least 2 parameters")
return []
if self.result_data.generations is None or len(self.result_data.generations) < 1:
logger.warning("result data must specify at least 1 generation")
return []
if self.result_data.particles is None or len(self.result_data.particles) < 5:
logger.warning("result data must specify at least 1 particle")
return []
vmin = self.result_data.model_space.min
vmax = self.result_data.model_space.max
pnames = self.result_data.non_degenerate_params()
pnames = sorted(list(pnames), key=str.lower)
kwargs = {}
if self.cmap is not None:
kwargs['cmap'] = self.cmap
if self.canvas is not None:
kwargs['canvas'] = self.canvas
files = []
fdict = {'base': self.base_filename}
for rd in self.result_data.iterate_generations():
fdict['gen'] = int(rd.generations[0])
filename = Path(self.report_dir, self.filename_format)
filename = Path(self.resolve_template(filename, fdict))
kwargs['title'] = self.resolve_template(self.title_format, fdict)
sorted_values = np.sort(rd.values, order='_particle')
values_2d = np.zeros((sorted_values.shape[0], len(pnames)))
for index, pname in enumerate(pnames):
values_2d[:, index] = (sorted_values[pname] - vmin[pname]) / (vmax[pname] - vmin[pname])
of = plot_genetic(filename, values_2d, pnames, **kwargs)
if of:
files.append(of)
return files
def render_genetic(output_file, values, model_space=None, generations=None, title=None, cmap=None,
canvas=None):
"""
produce a genetic chart from a given population.
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
the axes are the particle number and the model parameter.
the colour is mapped from the relative position of a parameter value within the parameter range.
the chart should illustrate the diversity in the population.
converged parameters will show similar colours.
by comparing charts of different generations, the effect of the optimization algorithm can be examined.
though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.
the function requires input in one of the following forms:
- a result (.dat) file or
numpy structured array.
the array must contain regular parameters, as well as the _particle and _gen columns.
other columns are ignored.
the function generates one chart per generation unless the generation argument is specified.
- a file (file name or file object) or numpy structured array.
the array must be wrapped in a sequence (tuple or list) for compatibility with other functions.
the array must essentially be in the same format as the corresponding member of the Population class.
the array must contain regular parameters, as well as the _particle columns.
files are loaded by numpy.genfromtxt.
- a pmsco.optimizers.population.Population object with valid data.
the generation is taken from the respective attribute and overrides the function argument.
- an open pmsco database session. the most recent job results are loaded.
the graphics file format can be changed by providing a specific canvas. default is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param output_file: path and base name of the output file without extension.
a generation index and the file extension according to the file format are appended.
@param values: a numpy structured ndarray of a population or result list from an optimization run.
alternatively, the file path of a result file (.dat) or population file (.pop) can be given.
file can be any object that numpy.genfromtxt() can handle.
array or file must be wrapped in a sequence.
@param model_space: model space can be a pmsco.project.ModelSpace object,
any object that contains the same min and max attributes as pmsco.project.ModelSpace,
or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
by default, the model space boundaries are derived from the input data.
if a model_space is specified, only the parameters listed in it are plotted.
@param generations: (int or sequence) generation index or list of indices.
this index is used in the output file name and for filtering input data by generation.
if the input data does not contain the generation, no filtering is applied.
by default, no filtering is applied, and one graph for each generation is produced.
@param title: (str) title of the chart.
the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
default: derived from file name.
@param cmap: (str) name of colour map supported by matplotlib.
default is 'jet'.
other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
some other options are:
matplotlib.backends.backend_pdf.FigureCanvasPdf or
matplotlib.backends.backend_svg.FigureCanvasSVG.
@return (list of str) paths of the generated graphics files.
empty if an error occurred.
@raise TypeError if matplotlib is not available.
"""
data = rp_results.ResultData()
if isinstance(generations, int):
generations = (generations,)
data.generations = generations
data.levels = {'scan': -1}
data.load_any(values)
if model_space is not None:
data.set_model_space(model_space)
plot = GeneticPlot()
plot.canvas = canvas
plot.cmap = cmap
if title:
plot.title_format = title
else:
plot.title_format = "${gen}"
plot.report_dir = Path(output_file).parent
plot.filename_format = Path(output_file).name + "-${gen}"
plot.validate(None)
plot.result_data = data
files = plot.create_report()
return files
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""
population genetics plot for multiple-scattering optimization results
this module operates on results or database files and produces one graphics file per generation.
database files contain the complete information for all plot types.
data from the most recent job stored in the database is used.
.dat results files contain all data shown in genetic plots.
.tasks.dat files lack the generation and particle identification and should not be used.
note that the plot type is independent of the optimization mode.
it's possible to generate genetic plots from a particle swarm optimization and vice versa.
""")
parser.add_argument('results_file',
help="path to results file (.dat) or sqlite3 database file.")
parser.add_argument('output_file',
help="base name of output file. generation and extension will be appended.")
parser.add_argument('-t', '--title', default=None,
help='graph title. may contain {gen} as a placeholder for the generation number.')
args, unknown_args = parser.parse_known_args()
kwargs = {}
if args.title is not None:
kwargs['title'] = args.title
render_func = render_genetic
if db_util.is_sqlite3_file(args.results_file):
import pmsco.database.access as db_access
db = db_access.DatabaseAccess()
db.connect(args.results_file)
with db.session() as session:
render_func(args.output_file, session, **kwargs)
else:
render_func(args.output_file, args.results_file, **kwargs)
if __name__ == '__main__':
main()
sys.exit(0)