#!/usr/bin/env python """ @package pmsco.reports.genetic graphics rendering module for population genetics. the module can be used in several different ways: 1. via the command line on a pmsco database or .dat results file. this is the most simple but least flexible way. 2. via python functions on given population arrays or database queries. this is the most flexible way but requires understanding of the required data formats. 3. as a listener on calculation events. (to be implemented) this will be configurable in the run file. @author Matthias Muntwiler, matthias.muntwiler@psi.ch @copyright (c) 2021 by Paul Scherrer Institut @n Licensed under the Apache License, Version 2.0 (the "License"); @n you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 """ import argparse import logging import numpy as np from pathlib import Path import sys if __name__ == "__main__": pmsco_root = Path(__file__).resolve().parent.parent.parent if str(pmsco_root) not in sys.path: sys.path.insert(0, str(pmsco_root)) import pmsco.reports.results as rp_results import pmsco.database.util as db_util import pmsco.database.query as db_query from pmsco.reports.base import ProjectReport from pmsco.reports.population import GenerationTracker logger = logging.getLogger(__name__) try: from matplotlib.figure import Figure from matplotlib.ticker import MaxNLocator from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas # from matplotlib.backends.backend_pdf import FigureCanvasPdf # from matplotlib.backends.backend_svg import FigureCanvasSVG except ImportError: Figure = None FigureCanvas = None MaxNLocator = None logger.warning("error importing matplotlib. graphics rendering disabled.") def plot_genetic(filename, rpos2d, param_labels, title=None, cmap=None, canvas=None): """ produce a genetic chart from the given data. a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space. the chart should highlight the amount of diversity in the population and - by comparing charts of different generations - the changes due to mutation. the axes are the model parameter (x) and particle number (y). the colour is mapped from the relative position of a parameter value within the parameter range. in contrast to render_genetic_chart() this function contains only the drawing code. it requires input in the final form and does not do any checks, conversion or processing. the graphics file format can be changed by providing a specific canvas. default is PNG. this function requires the matplotlib module. if it is not available, the function raises an error. @param filename: path and name of the output file without extension. @param rpos2d: (two-dimensional numpy array of numeric type) relative positions of the particles in the model space. dimension 0 (y-axis) is the particle index, dimension 1 (x-axis) is the parameter index (in the order given by param_labels). all values must be between 0 and 1. @param param_labels: (sequence) list or tuple of parameter names. @param title: (str) string to be printed as chart title. default is 'genetic chart'. @param cmap: (str) name of colour map supported by matplotlib. default is 'jet'. other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'. @param canvas: a FigureCanvas class reference from a matplotlib backend. if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format. some other options are: matplotlib.backends.backend_pdf.FigureCanvasPdf or matplotlib.backends.backend_svg.FigureCanvasSVG. @return (str) path and name of the generated graphics file. None if no file was generated due to an error. """ if canvas is None: canvas = FigureCanvas if canvas is None or Figure is None: return None if cmap is None: cmap = 'jet' if title is None: title = 'genetic chart' fig = Figure() canvas(fig) ax = fig.add_subplot(111) im = ax.imshow(rpos2d, aspect='auto', cmap=cmap, origin='lower') im.set_clim((0.0, 1.0)) ax.set_xticks(np.arange(len(param_labels))) ax.set_xticklabels(param_labels, rotation=45, ha="right", rotation_mode="anchor") ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_ylabel('particle') ax.set_title(title) cb = ax.figure.colorbar(im, ax=ax) cb.ax.set_ylabel("relative value", rotation=-90, va="bottom") out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype()) try: fig.savefig(out_filename) except OSError: logger.exception(f"exception while saving figure {out_filename}") out_filename = None return out_filename class GeneticPlot(ProjectReport, GenerationTracker): """ produce two-dimensional genetic population charts this class collects and validates all parameters and data for generating a series of genetic charts. it iterates over generations and calls plot_genetic() for each. a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space. the axes are the particle number and the model parameter. the colour is mapped from the relative position of a parameter value within the parameter range. the chart should illustrate the diversity in the population. converged parameters will show similar colours. by comparing charts of different generations, the effect of the optimization algorithm can be examined. though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well. the graphics file format can be changed by providing a specific canvas. default is PNG. """ def __init__(self): super().__init__() self._modes = ['genetic', 'swarm'] self.result_data = rp_results.ResultData() self.filename_format = "${base}-genetic-${gen}" self.title_format = "generation ${gen}" self.cmap = None self.params = None def select_data(self, jobs=-1, calcs=None): """ query data from the database this method must be implemented by the sub-class. @param jobs: filter by job. the argument can be a singleton or sequence of orm.Job objects or numeric id. if None, results from all jobs are loaded. if -1 (default), results from the most recent job (by datetime field) are loaded. @param calcs: the calcs argument is ignored. @return: None """ with self.get_session() as session: if jobs == -1: jobs = db_query.query_newest_job(session) changed_gens = self.changed_generations(session, jobs) self.result_data.reset_filters() self.result_data.generations = changed_gens self.result_data.levels = {'scan': -1} self.result_data.load_from_db(session, jobs=jobs) if self._project: self.result_data.set_model_space(self._project.model_space) def create_report(self): """ generate the plots based on the stored attributes. this method essentially loops over generations and parameter combinations, and compiles the input for plot_genetic. @return: list of created files """ # check that result data is compatible with genetic plots if self.result_data.params is None or len(self.result_data.params) < 2: logger.warning("result data must contain at least 2 parameters") return [] if self.result_data.generations is None or len(self.result_data.generations) < 1: logger.warning("result data must specify at least 1 generation") return [] if self.result_data.particles is None or len(self.result_data.particles) < 5: logger.warning("result data must specify at least 1 particle") return [] vmin = self.result_data.model_space.min vmax = self.result_data.model_space.max pnames = self.result_data.non_degenerate_params() pnames = sorted(list(pnames), key=str.lower) kwargs = {} if self.cmap is not None: kwargs['cmap'] = self.cmap if self.canvas is not None: kwargs['canvas'] = self.canvas files = [] fdict = {'base': self.base_filename} for rd in self.result_data.iterate_generations(): fdict['gen'] = int(rd.generations[0]) filename = Path(self.report_dir, self.filename_format) filename = Path(self.resolve_template(filename, fdict)) kwargs['title'] = self.resolve_template(self.title_format, fdict) sorted_values = np.sort(rd.values, order='_particle') values_2d = np.zeros((sorted_values.shape[0], len(pnames))) for index, pname in enumerate(pnames): values_2d[:, index] = (sorted_values[pname] - vmin[pname]) / (vmax[pname] - vmin[pname]) of = plot_genetic(filename, values_2d, pnames, **kwargs) if of: files.append(of) return files def render_genetic(output_file, values, model_space=None, generations=None, title=None, cmap=None, canvas=None): """ produce a genetic chart from a given population. a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space. the axes are the particle number and the model parameter. the colour is mapped from the relative position of a parameter value within the parameter range. the chart should illustrate the diversity in the population. converged parameters will show similar colours. by comparing charts of different generations, the effect of the optimization algorithm can be examined. though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well. the function requires input in one of the following forms: - a result (.dat) file or numpy structured array. the array must contain regular parameters, as well as the _particle and _gen columns. other columns are ignored. the function generates one chart per generation unless the generation argument is specified. - a file (file name or file object) or numpy structured array. the array must be wrapped in a sequence (tuple or list) for compatibility with other functions. the array must essentially be in the same format as the corresponding member of the Population class. the array must contain regular parameters, as well as the _particle columns. files are loaded by numpy.genfromtxt. - a pmsco.optimizers.population.Population object with valid data. the generation is taken from the respective attribute and overrides the function argument. - an open pmsco database session. the most recent job results are loaded. the graphics file format can be changed by providing a specific canvas. default is PNG. this function requires the matplotlib module. if it is not available, the function raises an error. @param output_file: path and base name of the output file without extension. a generation index and the file extension according to the file format are appended. @param values: a numpy structured ndarray of a population or result list from an optimization run. alternatively, the file path of a result file (.dat) or population file (.pop) can be given. file can be any object that numpy.genfromtxt() can handle. array or file must be wrapped in a sequence. @param model_space: model space can be a pmsco.project.ModelSpace object, any object that contains the same min and max attributes as pmsco.project.ModelSpace, or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries. by default, the model space boundaries are derived from the input data. if a model_space is specified, only the parameters listed in it are plotted. @param generations: (int or sequence) generation index or list of indices. this index is used in the output file name and for filtering input data by generation. if the input data does not contain the generation, no filtering is applied. by default, no filtering is applied, and one graph for each generation is produced. @param title: (str) title of the chart. the title is a {}-style format string, where {base} is the output file name and {gen} is the generation. default: derived from file name. @param cmap: (str) name of colour map supported by matplotlib. default is 'jet'. other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'. @param canvas: a FigureCanvas class reference from a matplotlib backend. if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format. some other options are: matplotlib.backends.backend_pdf.FigureCanvasPdf or matplotlib.backends.backend_svg.FigureCanvasSVG. @return (list of str) paths of the generated graphics files. empty if an error occurred. @raise TypeError if matplotlib is not available. """ data = rp_results.ResultData() if isinstance(generations, int): generations = (generations,) data.generations = generations data.levels = {'scan': -1} data.load_any(values) if model_space is not None: data.set_model_space(model_space) plot = GeneticPlot() plot.canvas = canvas plot.cmap = cmap if title: plot.title_format = title else: plot.title_format = "${gen}" plot.report_dir = Path(output_file).parent plot.filename_format = Path(output_file).name + "-${gen}" plot.validate(None) plot.result_data = data files = plot.create_report() return files def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=""" population genetics plot for multiple-scattering optimization results this module operates on results or database files and produces one graphics file per generation. database files contain the complete information for all plot types. data from the most recent job stored in the database is used. .dat results files contain all data shown in genetic plots. .tasks.dat files lack the generation and particle identification and should not be used. note that the plot type is independent of the optimization mode. it's possible to generate genetic plots from a particle swarm optimization and vice versa. """) parser.add_argument('results_file', help="path to results file (.dat) or sqlite3 database file.") parser.add_argument('output_file', help="base name of output file. generation and extension will be appended.") parser.add_argument('-t', '--title', default=None, help='graph title. may contain {gen} as a placeholder for the generation number.') args, unknown_args = parser.parse_known_args() kwargs = {} if args.title is not None: kwargs['title'] = args.title render_func = render_genetic if db_util.is_sqlite3_file(args.results_file): import pmsco.database.access as db_access db = db_access.DatabaseAccess() db.connect(args.results_file) with db.session() as session: render_func(args.output_file, session, **kwargs) else: render_func(args.output_file, args.results_file, **kwargs) if __name__ == '__main__': main() sys.exit(0)