""" gradient optimization module for MSC calculations the module starts multiple MSC calculations and optimizes the model parameters with a gradient search. the optimization task is distributed over multiple processes using MPI. the optimization must be started with N+1 processes in the MPI environment, where N equals the number of fit parameters. IMPLEMENTATION IN PROGRESS - DEBUGGING Requires: scipy, numpy Author: Matthias Muntwiler Copyright (c) 2015 by Paul Scherrer Institut Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 """ import numpy as np import scipy.optimize as so import data as md from mpi4py import MPI # messages sent from master to slaves # master sends new assignment # the message is a dictionary of model parameters TAG_NEW_TASK = 1 # master calls end of calculation # the message is empty TAG_FINISH = 2 # master sends current population # currently not used TAG_POPULATION = 2 # messages sent from slaves to master # slave reports new result # the message is a dictionary of model parameters and results TAG_NEW_RESULT = 1 # slave confirms end of calculation # currently not used TAG_FINISHED = 2 class MscProcess(object): """ Code shared by MscoMaster and MscoSlave """ def __init__(self, comm): self.comm = comm def setup(self, project): self.project = project self.running = False self.finishing = False self.iteration = 0 def run(self): pass def cleanup(self): pass def calc(self, pars): """ Executes a single MSC calculation. pars: A dictionary of parameters expected by the cluster and parameters functions. returns: pars with three additional values: rank: rank of the calculation process index: iteration index of the calculation process rfac: resulting R-factor all other calculation results are discarded. """ rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration) # create parameter and cluster structures clu = self.project.create_cluster(pars) par = self.project.create_params(pars) # generate file names base_filename = "%s_%u_%u" % (self.project.output_file, self.comm.rank, self.iteration) # call the msc program result_etpi = self.project.run_calc(par, clu, self.project.scan_file, base_filename, delete_files=True) # calculate modulation function and R-factor result_etpi = md.calc_modfunc_lowess(result_etpi) result_r = md.rfactor(self.project.scan_modf, result_etpi) pars['rank'] = self.comm.rank pars['iter'] = self.iteration pars['rfac'] = result_r return pars class MscMaster(MscProcess): def __init__(self, comm): super(MscMaster, self).__init__(comm) self.slaves = self.comm.Get_size() - 1 self.running_slaves = 0 def setup(self, project): super(MscMaster, self).setup(project) self.dom = project.create_domain() self.running_slaves = self.slaves self._outfile = open(self.project.output_file + ".dat", "w") self._outfile.write("#") self._outfile_keys = self.dom.start.keys() self._outfile_keys.append('rfac') for name in self._outfile_keys: self._outfile.write(" " + name) self._outfile.write("\n") def run(self): """ starts the minimization """ # pack initial guess, bounds, constant parameters nparams = len(self.dom.start) fit_params = np.zeros((nparams)) params_index = {} const_params = self.dom.max.copy() bounds = [] n_fit_params = 0 for key in self.dom.start: if self.dom.max[key] > self.dom.min[key]: fit_params[n_fit_params] = self.dom.start[key] params_index[key] = n_fit_params n_fit_params += 1 bounds.append((self.dom.min[key], self.dom.max[key])) fit_params.resize((n_fit_params)) fit_result = so.minimize(self._minfunc, fit_params, args=(params_index, const_params), method='L-BFGS-B', jac=True, bounds=bounds) msc_result = const_params.copy() for key, index in params_index.items(): msc_result[key] = fit_result.x[index] msc_result['rfac'] = fit_result.fun self._outfile.write("# result of gradient optimization\n") self._outfile.write("# success = {0}, iterations = {1}, calculations = {2}\n".format(fit_result.success, fit_result.nit, fit_result.nfev)) self._outfile.write("# message: {0}\n".format(fit_result.message)) for name in self._outfile_keys: self._outfile.write(" " + str(msc_result[name])) self._outfile.write("\n") def _minfunc(self, fit_params, params_index, const_params): """ function to be minimized fit_params (numpy.ndarray): current fit position master (MscoMaster): reference to the master process params_index (dict): dictionary of fit parameters and their index in fit_params. key=MSC parameter name, value=index to fit_params. const_params (dict): dictionary of MSC parameters holding (at least) the constant parameter values. a copy of this instance, updated with the current fit position, is passed to MSC. """ # unpack parameters msc_params = const_params.copy() for key, index in params_index.items(): msc_params[key] = fit_params[index] # run MSC calculations rfac, jac_dict = self.run_msc_calcs(msc_params, params_index) # pack jacobian jac_arr = np.zeros_like(fit_params) for key, index in params_index.items(): jac_arr[index] = jac_dict[key] return rfac, jac_arr def run_msc_calcs(self, params, params_index): """ params: dictionary of actual parameters params_index: dictionary of fit parameter indices. only the keys are used here to decide for which parameters the derivative is calculated. returns: (float) R-factor at the params location (dict) approximate gradient at the params location """ # distribute tasks for gradient slave_rank = 1 for key in params_index: params2 = params.copy() params2[key] += self.dom.step[key] params2['key'] = key self.comm.send(params2, dest=slave_rank, tag=TAG_NEW_TASK) slave_rank += 1 # run calculation for actual position result0 = self.calc(params) for name in self._outfile_keys: self._outfile.write(" " + str(result0[name])) self._outfile.write("\n") # gather results s = MPI.Status() jacobian = params.copy() for slave in range(1, slave_rank): result1 = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s) if s.tag == TAG_NEW_RESULT: key = result1['key'] jacobian[key] = (result1['rfac'] - result0['rfac']) / (result1[key] - result0[key]) for name in self._outfile_keys: self._outfile.write(" " + str(result1[name])) self._outfile.write("\n") self._outfile.flush() return result0['rfac'], jacobian def cleanup(self): """ cleanup: close output file, terminate slave processes """ self._outfile.close() for rank in range(1, self.running_slaves + 1): self.comm.send(None, dest=rank, tag=TAG_FINISH) super(MscMaster, self).cleanup() class MscSlave(MscProcess): def run(self): """ Waits for messages from the master and dispatches tasks. """ s = MPI.Status() self.running = True while self.running: data = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=s) if s.tag == TAG_NEW_TASK: self.accept_task(data) elif s.tag == TAG_FINISH: self.running = False def accept_task(self, pars): """ Executes a calculation task and returns the result to the master. """ result = self.calc(pars) self.comm.send(result, dest=0, tag=TAG_NEW_RESULT) self.iteration += 1 def optimize(project): """ main entry point for optimization rank 0: starts the calculation, distributes tasks ranks 1...N-1: work on assignments from rank 0 """ mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() if mpi_rank == 0: master = MscMaster(mpi_comm) master.setup(project) master.run() master.cleanup() else: slave = MscSlave(mpi_comm) slave.setup(project) slave.run() slave.cleanup()