pmsco-public/pmsco/graphics/rfactor.py

"""
@package pmsco.graphics.rfactor
graphics rendering module for r-factor optimization results.

this module is under development.
interface and implementation are subject to change.

@author Matthias Muntwiler, matthias.muntwiler@psi.ch

@copyright (c) 2018 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import numpy as np
from pmsco.helpers import BraceMessage as BMsg

logger = logging.getLogger(__name__)

try:
    from matplotlib.figure import Figure
    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
except ImportError:
    Figure = None
    FigureCanvas = None
    logger.warning("error importing matplotlib. graphics rendering disabled.")


def render_param_rfac(filename, data, param_name, summary=None, canvas=None):
    """
    render an r-factor versus one model parameter graph.

    the default file format is PNG.

    this function requires the matplotlib module.
    if it is not available, the function raises an error.

    @param filename: path and name of the results file.
        this is used to derive the output file path by adding the parameter name and
        the extension of the graphics file format.

    @param data: numpy-structured array of results (one-dimensional).

        the field names identify the model parameters and optimization control values.
        model parameters can have any name not including a leading underscore and are evaluated as is.
        the names of the special optimization control values begin with an underscore.
        of these, at least _rfac must be provided.

    @param param_name: name of the model parameter to display.
        this must correspond to a field name of the data array.

    @param summary: (dict) the dictionary returned by @ref evaluate_results.
        this is used to mark the optimum value and the error limits.
        if None, these values are not marked in the plot.

    @param canvas: a FigureCanvas class reference from a matplotlib backend.
        if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.

    @return (str) path and name of the generated graphics file.
        empty string if an error occurred.

    @raise TypeError if matplotlib is not available.
    """
    if canvas is None:
        canvas = FigureCanvas
    fig = Figure()
    canvas(fig)

    ax = fig.add_subplot(111)
    ax.scatter(data[param_name], data['_rfac'], c='b', marker='o', s=4.0)

    if summary is not None:
        xval = summary['val'][param_name]
        ymin = summary['vmin']['_rfac']
        ymax = summary['vmax']['_rfac']
        ax.plot((xval, xval), (ymin, ymax), ':k')
        xmin = summary['vmin'][param_name]
        xmax = summary['vmax'][param_name]
        varr = summary['rmin'] + summary['rvar']
        ax.plot((xmin, xmax), (varr, varr), ':k')

    ax.grid(True)
    ax.set_xlabel(param_name)
    ax.set_ylabel('R-factor')

    out_filename = "{0}.{1}.{2}".format(filename, param_name, canvas.get_default_filetype())
    fig.savefig(out_filename)
    return out_filename


def evaluate_results(data, features=50.):
    """
    @param data: numpy-structured array of results (one-dimensional).

        the field names identify the model parameters and optimization control values.
        model parameters can have any name not including a leading underscore and are evaluated as is.
        the names of the special optimization control values begin with an underscore.
        of these, at least _rfac must be provided.

    @param features: number of independent features (pieces of information) in the data.
        this quantity can be approximated as the scan range divided by the average width of a feature
        which includes an intrinsic component and the instrumental resolution.
        see Booth et al., Surf. Sci. 387 (1997), 152 for energy scans, and
        Muntwiler et al., Surf. Sci. 472 (2001), 125 for angle scans.
        the default value of 50 is a typical value.

    @return dictionary of evaluation results.

        the dictionary contains scalars and structured arrays as follows.
        the structured arrays have the same data type as the input data and contain exactly one element.

        @arg rmin: (scalar) minimum r-factor.
        @arg rvar: (scalar) one-sigma variation of r-factor.
        @arg imin: (scalar) array index where the minimum is located.
        @arg val:  (structured array) estimates of parameter values (parameter value at rmin).
        @arg sig:  (structured array) one-sigma error of estimated values.
        @arg vmin: (structured array) minimum value of the parameter.
        @arg vmax: (structured array) maximum value of the parameter.
    """
    imin = data['_rfac'].argmin()
    rmin = data['_rfac'][imin]
    rvar = rmin * math.sqrt(2. / float(features))

    val = np.zeros(1, dtype=data.dtype)
    sig = np.zeros(1, dtype=data.dtype)
    vmin = np.zeros(1, dtype=data.dtype)
    vmax = np.zeros(1, dtype=data.dtype)
    sel = data['_rfac'] <= rmin + rvar
    for name in data.dtype.names:
        val[name] = data[name][imin]
        vmin[name] = data[name].min()
        vmax[name] = data[name].max()
        if name[0] != '_':
            sig[name] = (data[name][sel].max() - data[name][sel].min()) / 2.

    results = {'rmin': rmin, 'rvar': rvar, 'imin': imin, 'val': val, 'sig': sig, 'vmin': vmin, 'vmax': vmax}
    return results


def render_results(results_file, data=None):
    """
    produce a graphics file from optimization results.

    the results can be passed in a file name or numpy array (see parameter section).

    the default file format is PNG.

    this function requires the matplotlib module.
    if it is not available, the function will log a warning message and return gracefully.

    @param results_file: path and name of the result file.

        result files are the ones written by swarm.SwarmPopulation.save_array, for instance.
        the file contains columns of model parameters and optimization control values.
        the first row must contain column names that identify the quantity.
        model parameters can have any name not including a leading underscore and are evaluated as is.
        the names of the special optimization control values begin with an underscore.
        of these, at least _rfac must be provided.

        if the optional data parameter is present,
        this is used only to derive the output file path by adding the extension of the graphics file format.

    @param data: numpy-structured array of results (one-dimensional).

        the field names identify the model parameters and optimization control values.
        model parameters can have any name not including a leading underscore and are evaluated as is.
        the names of the special optimization control values begin with an underscore.
        of these, at least _rfac must be provided.

        if this argument is omitted, the data is loaded from the file referenced by the filename argument.

    @return (list of str) path names of the generated graphics files.
        empty if an error occurred.
        the most common exceptions are caught and add a warning in the log file.
    """

    if data is None:
        data = np.genfromtxt(results_file, names=True)

    summary = evaluate_results(data)

    out_files = []
    try:
        for name in data.dtype.names:
            if name[0] != '_' and summary['sig'][name] > 0.:
                graph_file = render_param_rfac(results_file, data, name, summary)
                out_files.append(graph_file)
    except (TypeError, AttributeError, IOError) as e:
        logger.warning(BMsg("error rendering scan file {file}: {msg}", file=results_file, msg=str(e)))

    return out_files