"""
gradient optimization module for MSC calculations

the module starts multiple MSC calculations and optimizes the model parameters
with a gradient search.

the optimization task is distributed over multiple processes using MPI.
the optimization must be started with N+1 processes in the MPI environment,
where N equals the number of fit parameters.

IMPLEMENTATION IN PROGRESS - DEBUGGING

Requires: scipy, numpy

Author: Matthias Muntwiler

Copyright (c) 2015 by Paul Scherrer Institut

Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
"""

import numpy as np
import scipy.optimize as so
import data as md
from mpi4py import MPI

# messages sent from master to slaves

# master sends new assignment
# the message is a dictionary of model parameters
TAG_NEW_TASK = 1
# master calls end of calculation
# the message is empty
TAG_FINISH = 2
# master sends current population
# currently not used
TAG_POPULATION = 2

# messages sent from slaves to master
# slave reports new result
# the message is a dictionary of model parameters and results
TAG_NEW_RESULT = 1
# slave confirms end of calculation
# currently not used
TAG_FINISHED = 2

class MscProcess(object):
    """
    Code shared by MscoMaster and MscoSlave
    """
    def __init__(self, comm):
        self.comm = comm

    def setup(self, project):
        self.project = project
        self.running = False
        self.finishing = False
        self.iteration = 0

    def run(self):
        pass

    def cleanup(self):
        pass

    def calc(self, pars):
        """
        Executes a single MSC calculation.

        pars: A dictionary of parameters expected by the cluster and parameters functions.

        returns: pars with three additional values:
            rank:  rank of the calculation process
            index: iteration index of the calculation process
            rfac:  resulting R-factor

            all other calculation results are discarded.
        """
        rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration)

        # create parameter and cluster structures
        clu = self.project.create_cluster(pars)
        par = self.project.create_params(pars)

        # generate file names
        base_filename = "%s_%u_%u" % (self.project.output_file, self.comm.rank, self.iteration)

        # call the msc program
        result_etpi = self.project.run_calc(par, clu, self.project.scan_file, base_filename, delete_files=True)

        # calculate modulation function and R-factor
        result_etpi = md.calc_modfunc_lowess(result_etpi)
        result_r = md.rfactor(self.project.scan_modf, result_etpi)

        pars['rank'] = self.comm.rank
        pars['iter'] = self.iteration
        pars['rfac'] = result_r

        return pars

class MscMaster(MscProcess):
    def __init__(self, comm):
        super(MscMaster, self).__init__(comm)
        self.slaves = self.comm.Get_size() - 1
        self.running_slaves = 0

    def setup(self, project):
        super(MscMaster, self).setup(project)
        self.dom = project.create_domain()
        self.running_slaves = self.slaves

        self._outfile = open(self.project.output_file + ".dat", "w")
        self._outfile.write("#")
        self._outfile_keys = self.dom.start.keys()
        self._outfile_keys.append('rfac')
        for name in self._outfile_keys:
            self._outfile.write(" " + name)
        self._outfile.write("\n")

    def run(self):
        """
        starts the minimization
        """
        # pack initial guess, bounds, constant parameters
        nparams = len(self.dom.start)
        fit_params = np.zeros((nparams))
        params_index = {}
        const_params = self.dom.max.copy()
        bounds = []
        n_fit_params = 0
        for key in self.dom.start:
            if self.dom.max[key] > self.dom.min[key]:
                fit_params[n_fit_params] = self.dom.start[key]
                params_index[key] = n_fit_params
                n_fit_params += 1
                bounds.append((self.dom.min[key], self.dom.max[key]))
        fit_params.resize((n_fit_params))

        fit_result = so.minimize(self._minfunc, fit_params,
            args=(params_index, const_params),
            method='L-BFGS-B', jac=True, bounds=bounds)

        msc_result = const_params.copy()
        for key, index in params_index.items():
            msc_result[key] = fit_result.x[index]
        msc_result['rfac'] = fit_result.fun

        self._outfile.write("# result of gradient optimization\n")
        self._outfile.write("# success = {0}, iterations = {1}, calculations = {2}\n".format(fit_result.success, fit_result.nit, fit_result.nfev))
        self._outfile.write("# message: {0}\n".format(fit_result.message))
        for name in self._outfile_keys:
            self._outfile.write(" " + str(msc_result[name]))
        self._outfile.write("\n")

    def _minfunc(self, fit_params, params_index, const_params):
        """
        function to be minimized

        fit_params (numpy.ndarray): current fit position
        master (MscoMaster): reference to the master process
        params_index (dict): dictionary of fit parameters
            and their index in fit_params.
            key=MSC parameter name, value=index to fit_params.
        const_params (dict): dictionary of MSC parameters
            holding (at least) the constant parameter values.
            a copy of this instance, updated with the current fit position,
            is passed to MSC.
        """

        # unpack parameters
        msc_params = const_params.copy()
        for key, index in params_index.items():
            msc_params[key] = fit_params[index]

        # run MSC calculations
        rfac, jac_dict = self.run_msc_calcs(msc_params, params_index)

        # pack jacobian
        jac_arr = np.zeros_like(fit_params)
        for key, index in params_index.items():
            jac_arr[index] = jac_dict[key]

        return rfac, jac_arr

    def run_msc_calcs(self, params, params_index):
        """
        params: dictionary of actual parameters
        params_index: dictionary of fit parameter indices.
            only the keys are used here
            to decide for which parameters the derivative is calculated.

        returns:
        (float) R-factor at the params location
        (dict) approximate gradient at the params location
        """
        # distribute tasks for gradient
        slave_rank = 1
        for key in params_index:
            params2 = params.copy()
            params2[key] += self.dom.step[key]
            params2['key'] = key
            self.comm.send(params2, dest=slave_rank, tag=TAG_NEW_TASK)
            slave_rank += 1

        # run calculation for actual position
        result0 = self.calc(params)
        for name in self._outfile_keys:
            self._outfile.write(" " + str(result0[name]))
        self._outfile.write("\n")

        # gather results
        s = MPI.Status()
        jacobian = params.copy()
        for slave in range(1, slave_rank):
            result1 = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
            if s.tag == TAG_NEW_RESULT:
                key = result1['key']
                jacobian[key] = (result1['rfac'] - result0['rfac']) / (result1[key] - result0[key])
                for name in self._outfile_keys:
                    self._outfile.write(" " + str(result1[name]))
                self._outfile.write("\n")

        self._outfile.flush()
        return result0['rfac'], jacobian

    def cleanup(self):
        """
        cleanup: close output file, terminate slave processes
        """
        self._outfile.close()
        for rank in range(1, self.running_slaves + 1):
            self.comm.send(None, dest=rank, tag=TAG_FINISH)
        super(MscMaster, self).cleanup()

class MscSlave(MscProcess):

    def run(self):
        """
        Waits for messages from the master and dispatches tasks.
        """
        s = MPI.Status()
        self.running = True
        while self.running:
            data = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
            if s.tag == TAG_NEW_TASK:
                self.accept_task(data)
            elif s.tag == TAG_FINISH:
                self.running = False

    def accept_task(self, pars):
        """
        Executes a calculation task and returns the result to the master.
        """
        result = self.calc(pars)
        self.comm.send(result, dest=0, tag=TAG_NEW_RESULT)
        self.iteration += 1

def optimize(project):
    """
    main entry point for optimization

    rank 0: starts the calculation, distributes tasks
    ranks 1...N-1: work on assignments from rank 0
    """
    mpi_comm = MPI.COMM_WORLD
    mpi_rank = mpi_comm.Get_rank()

    if mpi_rank == 0:
        master = MscMaster(mpi_comm)
        master.setup(project)
        master.run()
        master.cleanup()
    else:
        slave = MscSlave(mpi_comm)
        slave.setup(project)
        slave.run()
        slave.cleanup()