pmsco-public/pmsco/graphics/population.py

"""
@package pmsco.graphics.population
graphics rendering module for population dynamics.

the main function is render_genetic_chart().

this module is experimental.
interface and implementation are subject to change.

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

@copyright (c) 2021 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
"""

import logging
import numpy as np
import os
from pmsco.database import regular_params, special_params

logger = logging.getLogger(__name__)

try:
    from matplotlib.figure import Figure
    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
    # from matplotlib.backends.backend_pdf import FigureCanvasPdf
    # from matplotlib.backends.backend_svg import FigureCanvasSVG
except ImportError:
    Figure = None
    FigureCanvas = None
    logger.warning("error importing matplotlib. graphics rendering disabled.")


def _default_range(pos):
    """
    determine a default range from actual values.

    @param pos: (numpy.ndarray) 1-dimensional structured array of parameter values.
    @return: range_min, range_max are dictionaries of the minimum and maximum values of each parameter.
    """
    names = regular_params(pos.dtype.names)
    range_min = {}
    range_max = {}
    for name in names:
        range_min[name] = pos[name].min()
        range_max[name] = pos[name].max()
    return range_min, range_max


def _prune_constant_params(pnames, range_min, range_max):
    """
    remove constant parameters from the list and range

    @param pnames: (list)
    @param range_min: (dict)
    @param range_max: (dict)
    @return:
    """
    del_names = [name for name in pnames if range_max[name] <= range_min[name]]
    for name in del_names:
        pnames.remove(name)
        del range_min[name]
        del range_max[name]


def render_genetic_chart(output_file, input_data_or_file, model_space=None, generations=None, title=None, cmap=None,
                         canvas=None):
    """
    produce a genetic chart from a given population.

    a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
    the axes are the particle number and the model parameter.
    the colour is mapped from the relative position of a parameter value within the parameter range.

    the chart should illustrate the diversity in the population.
    converged parameters will show similar colours.
    by comparing charts of different generations, the effect of the optimization algorithm can be examined.
    though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.

    the function requires input in one of the following forms:
    - a result (.dat) file or numpy structured array.
      the array must contain regular parameters, as well as the _particle and _gen columns.
      the function generates one chart per generation unless the generation argument is specified.
    - a population (.pop) file or numpy structured array.
      the array must contain regular parameters, as well as the _particle columns.
    - a pmsco.optimizers.population.Population object with valid data.

    the graphics file format can be changed by providing a specific canvas. default is PNG.

    this function requires the matplotlib module.
    if it is not available, the function raises an error.

    @param output_file: path and base name of the output file without extension.
        a generation index and the file extension according to the file format are appended.
    @param input_data_or_file: a numpy structured ndarray of a population or result list from an optimization run.
        alternatively, the file path of a result file (.dat) or population file (.pop) can be given.
        file can be any object that numpy.genfromtxt() can handle.
    @param model_space: model space can be a pmsco.project.ModelSpace object,
        any object that contains the same min and max attributes as pmsco.project.ModelSpace,
        or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
        by default, the model space boundaries are derived from the input data.
        if a model_space is specified, only the parameters listed in it are plotted.
    @param generations: (int or sequence) generation index or list of indices.
        this index is used in the output file name and for filtering input data by generation.
        if the input data does not contain the generation, no filtering is applied.
        by default, no filtering is applied, and one graph for each generation is produced.
    @param title: (str) title of the chart.
        the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
        default: derived from file name.
    @param cmap: (str) name of colour map supported by matplotlib.
        default is 'jet'.
        other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
        some other options are:
        matplotlib.backends.backend_pdf.FigureCanvasPdf or
        matplotlib.backends.backend_svg.FigureCanvasSVG.

    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.

    @raise TypeError if matplotlib is not available.
    """

    try:
        pos = np.copy(input_data_or_file.pos)
        range_min = input_data_or_file.model_min
        range_max = input_data_or_file.model_max
        generations = [input_data_or_file.generation]
    except AttributeError:
        try:
            pos = np.atleast_1d(np.genfromtxt(input_data_or_file, names=True))
        except TypeError:
            pos = np.copy(input_data_or_file)
        range_min, range_max = _default_range(pos)
    pnames = regular_params(pos.dtype.names)

    if model_space is not None:
        try:
            # a ModelSpace-like object
            range_min = model_space.min
            range_max = model_space.max
        except AttributeError:
            # a dictionary-like object
            range_min = model_space['min']
            range_max = model_space['max']
        try:
            pnames = range_min.keys()
        except AttributeError:
            pnames = range_min.dtype.names

    pnames = list(pnames)
    _prune_constant_params(pnames, range_min, range_max)

    if generations is None:
        try:
            generations = np.unique(pos['_gen'])
        except ValueError:
            pass

    files = []
    path, base = os.path.split(output_file)
    if generations is not None and len(generations):
        if title is None:
            title = "{base} gen {gen}"

        for generation in generations:
            idx = np.where(pos['_gen'] == generation)
            gpos = pos[idx]
            gtitle = title.format(base=base, gen=int(generation))
            out_filename = "{base}-{gen}".format(base=os.fspath(output_file), gen=int(generation))
            out_filename = _render_genetic_chart_2(out_filename, gpos, pnames, range_min, range_max,
                                                   gtitle, cmap, canvas)
            files.append(out_filename)
    else:
        if title is None:
            title = "{base}"
        gtitle = title.format(base=base, gen="")
        out_filename = "{base}".format(base=os.fspath(output_file))
        out_filename = _render_genetic_chart_2(out_filename, pos, pnames, range_min, range_max, gtitle, cmap, canvas)
        files.append(out_filename)

    return files


def _render_genetic_chart_2(out_filename, pos, pnames, range_min, range_max, title, cmap, canvas):
    """
    internal part of render_genetic_chart()

    this function calculates the relative position in the model space,
    sorts the positions array by particle index,
    and calls plot_genetic_chart().

    @param out_filename:
    @param pos:
    @param pnames:
    @param range_max:
    @param range_min:
    @param cmap:
    @param canvas:
    @return: out_filename
    """
    spos = np.sort(pos, order='_particle')
    rpos2d = np.zeros((spos.shape[0], len(pnames)))
    for index, pname in enumerate(pnames):
        rpos2d[:, index] = (spos[pname] - range_min[pname]) / (range_max[pname] - range_min[pname])
    out_filename = plot_genetic_chart(out_filename, rpos2d, pnames, title=title, cmap=cmap, canvas=canvas)
    return out_filename


def plot_genetic_chart(filename, rpos2d, param_labels, title=None, cmap=None, canvas=None):
    """
    produce a genetic chart from the given data.

    a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
    the chart should highlight the amount of diversity in the population
    and - by comparing charts of different generations - the changes due to mutation.
    the axes are the model parameter (x) and particle number (y).
    the colour is mapped from the relative position of a parameter value within the parameter range.

    in contrast to render_genetic_chart() this function contains only the drawing code.
    it requires input in the final form and does not do any checks, conversion or processing.

    the graphics file format can be changed by providing a specific canvas. default is PNG.

    this function requires the matplotlib module.
    if it is not available, the function raises an error.

    @param filename: path and name of the output file without extension.
    @param rpos2d: (two-dimensional numpy array of numeric type)
        relative positions of the particles in the model space.
        dimension 0 (y-axis) is the particle index,
        dimension 1 (x-axis) is the parameter index (in the order given by param_labels).
        all values must be between 0 and 1.
    @param param_labels: (sequence) list or tuple of parameter names.
    @param title: (str) string to be printed as chart title. default is 'genetic chart'.
    @param cmap: (str) name of colour map supported by matplotlib.
        default is 'jet'.
        other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
        some other options are:
        matplotlib.backends.backend_pdf.FigureCanvasPdf or
        matplotlib.backends.backend_svg.FigureCanvasSVG.

    @raise TypeError if matplotlib is not available.
    """
    if canvas is None:
        canvas = FigureCanvas
    if cmap is None:
        cmap = 'jet'
    if title is None:
        title = 'genetic chart'

    fig = Figure()
    canvas(fig)
    ax = fig.add_subplot(111)
    im = ax.imshow(rpos2d, aspect='auto', cmap=cmap, origin='lower')
    im.set_clim((0.0, 1.0))
    ax.set_xticks(np.arange(len(param_labels)))
    ax.set_xticklabels(param_labels, rotation=45, ha="right", rotation_mode="anchor")
    ax.set_ylabel('particle')
    ax.set_title(title)
    cb = ax.figure.colorbar(im, ax=ax)
    cb.ax.set_ylabel("relative value", rotation=-90, va="bottom")

    out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
    fig.savefig(out_filename)
    return out_filename


def render_swarm(output_file, input_data, model_space=None, title=None, cmap=None, canvas=None):
    """
    render a two-dimensional particle swarm population.

    this function generates a schematic rendering of a particle swarm in two dimensions.
    particles are represented by their position and velocity, indicated by an arrow.
    the model space is projected on the first two (or selected two) variable parameters.
    in the background, a scatter plot of results (dots with pseudocolor representing the R-factor) can be plotted.
    the chart type is designed for the particle swarm optimization algorithm.

    the function requires input in one of the following forms:
    - position (.pos), velocity (.vel) and result (.dat) files or the respective numpy structured arrays.
      the arrays must contain regular parameters, as well as the `_particle` column.
      the result file must also contain an `_rfac` column.
    - a pmsco.optimizers.population.Population object with valid data.

    the graphics file format can be changed by providing a specific canvas. default is PNG.

    this function requires the matplotlib module.
    if it is not available, the function raises an error.

    @param output_file: path and base name of the output file without extension.
        a generation index and the file extension according to the file format are appended.
    @param input_data: a pmsco.optimizers.population.Population object with valid data,
        or a sequence of position, velocity and result arrays.
        the arrays must be structured ndarrays corresponding to the respective Population members.
        alternatively, the arrays can be referenced as file paths
        in any format that numpy.genfromtxt() can handle.
    @param model_space: model space can be a pmsco.project.ModelSpace object,
        any object that contains the same min and max attributes as pmsco.project.ModelSpace,
        or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
        by default, the model space boundaries are derived from the input data.
        if a model_space is specified, only the parameters listed in it are plotted.
    @param title: (str) title of the chart.
        the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
        default: derived from file name.
    @param cmap: (str) name of colour map supported by matplotlib.
        default is 'plasma'.
        other good-looking options are 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
        some other options are:
        matplotlib.backends.backend_pdf.FigureCanvasPdf or
        matplotlib.backends.backend_svg.FigureCanvasSVG.

    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.

    @raise TypeError if matplotlib is not available.
    """
    try:
        range_min = input_data.model_min
        range_max = input_data.model_max
        pos = np.copy(input_data.pos)
        vel = np.copy(input_data.vel)
        rfac = np.copy(input_data.results)
        generation = input_data.generation
    except AttributeError:
        try:
            pos = np.atleast_1d(np.genfromtxt(input_data[0], names=True))
            vel = np.atleast_1d(np.genfromtxt(input_data[1], names=True))
            rfac = np.atleast_1d(np.genfromtxt(input_data[2], names=True))
        except TypeError:
            pos = np.copy(input_data[0])
            vel = np.copy(input_data[1])
            rfac = np.copy(input_data[2])
        range_min, range_max = _default_range(rfac)
    pnames = regular_params(pos.dtype.names)

    if model_space is not None:
        try:
            # a ModelSpace-like object
            range_min = model_space.min
            range_max = model_space.max
        except AttributeError:
            # a dictionary-like object
            range_min = model_space['min']
            range_max = model_space['max']
        try:
            pnames = range_min.keys()
        except AttributeError:
            pnames = range_min.dtype.names

    pnames = list(pnames)
    _prune_constant_params(pnames, range_min, range_max)
    pnames = pnames[0:2]
    files = []
    if len(pnames) == 2:
        params = {pnames[0]: [range_min[pnames[0]], range_max[pnames[0]]],
                  pnames[1]: [range_min[pnames[1]], range_max[pnames[1]]]}
        out_filename = plot_swarm(output_file, pos, vel, rfac, params, title=title, cmap=cmap, canvas=canvas)
        files.append(out_filename)
    else:
        logging.warning("model space must be two-dimensional and non-degenerate.")

    return files


def plot_swarm(filename, pos, vel, rfac, params, title=None, cmap=None, canvas=None):
    """
    plot a two-dimensional particle swarm population.

    this is a sub-function of render_swarm() containing just the plotting commands.

    the graphics file format can be changed by providing a specific canvas. default is PNG.

    this function requires the matplotlib module.
    if it is not available, the function raises an error.

    @param filename: path and base name of the output file without extension.
        a generation index and the file extension according to the file format are appended.
    @param pos: structured ndarray containing the positions of the particles.
    @param vel: structured ndarray containing the velocities of the particles.
    @param rfac: structured ndarray containing positions and R-factor values.
        this array is independent of pos and vel.
        it can also be set to None if results should be suppressed.
    @param params: dictionary of two parameters to be plotted.
        the keys correspond to columns of the pos, vel and rfac arrays.
        the values are lists [minimum, maximum] that define the axis range.
    @param title: (str) title of the chart.
        the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
        default: derived from file name.
    @param cmap: (str) name of colour map supported by matplotlib.
        default is 'plasma'.
        other good-looking options are 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
        some other options are:
        matplotlib.backends.backend_pdf.FigureCanvasPdf or
        matplotlib.backends.backend_svg.FigureCanvasSVG.

    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.

    @raise TypeError if matplotlib is not available.
    """
    if canvas is None:
        canvas = FigureCanvas
    if cmap is None:
        cmap = 'plasma'
    if title is None:
        title = 'swarm map'

    pnames = list(params.keys())
    fig = Figure()
    canvas(fig)
    ax = fig.add_subplot(111)

    if rfac is not None:
        try:
            s = ax.scatter(rfac[params[0]], rfac[params[1]], s=5, c=rfac['_rfac'], cmap=cmap, vmin=0, vmax=1)
        except ValueError:
            # _rfac column missing
            pass
        else:
            cb = ax.figure.colorbar(s, ax=ax)
            cb.ax.set_ylabel("R-factor", rotation=-90, va="bottom")

    p = ax.plot(pos[pnames[0]], pos[pnames[1]], 'co')
    q = ax.quiver(pos[pnames[0]], pos[pnames[1]], vel[pnames[0]], vel[pnames[1]], color='c')
    ax.set_xlim(params[pnames[0]])
    ax.set_ylim(params[pnames[1]])
    ax.set_xlabel(pnames[0])
    ax.set_ylabel(pnames[1])
    ax.set_title(title)

    out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
    fig.savefig(out_filename)
    return out_filename