matthias muntwiler acea809e4e update public distribution
based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100
tagged rev-master-2.0.0
2019-01-31 15:45:02 +01:00

281 lines
9.0 KiB
Python

"""
gradient optimization module for MSC calculations
the module starts multiple MSC calculations and optimizes the model parameters
with a gradient search.
the optimization task is distributed over multiple processes using MPI.
the optimization must be started with N+1 processes in the MPI environment,
where N equals the number of fit parameters.
IMPLEMENTATION IN PROGRESS - DEBUGGING
Requires: scipy, numpy
Author: Matthias Muntwiler
Copyright (c) 2015 by Paul Scherrer Institut
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import numpy as np
import scipy.optimize as so
import data as md
from mpi4py import MPI
# messages sent from master to slaves
# master sends new assignment
# the message is a dictionary of model parameters
TAG_NEW_TASK = 1
# master calls end of calculation
# the message is empty
TAG_FINISH = 2
# master sends current population
# currently not used
TAG_POPULATION = 2
# messages sent from slaves to master
# slave reports new result
# the message is a dictionary of model parameters and results
TAG_NEW_RESULT = 1
# slave confirms end of calculation
# currently not used
TAG_FINISHED = 2
class MscProcess(object):
"""
Code shared by MscoMaster and MscoSlave
"""
def __init__(self, comm):
self.comm = comm
def setup(self, project):
self.project = project
self.running = False
self.finishing = False
self.iteration = 0
def run(self):
pass
def cleanup(self):
pass
def calc(self, pars):
"""
Executes a single MSC calculation.
pars: A dictionary of parameters expected by the cluster and parameters functions.
returns: pars with three additional values:
rank: rank of the calculation process
index: iteration index of the calculation process
rfac: resulting R-factor
all other calculation results are discarded.
"""
rev = "rank %u, iteration %u" % (self.comm.rank, self.iteration)
# create parameter and cluster structures
clu = self.project.create_cluster(pars)
par = self.project.create_params(pars)
# generate file names
base_filename = "%s_%u_%u" % (self.project.output_file, self.comm.rank, self.iteration)
# call the msc program
result_etpi = self.project.run_calc(par, clu, self.project.scan_file, base_filename, delete_files=True)
# calculate modulation function and R-factor
result_etpi = md.calc_modfunc_lowess(result_etpi)
result_r = md.rfactor(self.project.scan_modf, result_etpi)
pars['rank'] = self.comm.rank
pars['iter'] = self.iteration
pars['rfac'] = result_r
return pars
class MscMaster(MscProcess):
def __init__(self, comm):
super(MscMaster, self).__init__(comm)
self.slaves = self.comm.Get_size() - 1
self.running_slaves = 0
def setup(self, project):
super(MscMaster, self).setup(project)
self.dom = project.create_domain()
self.running_slaves = self.slaves
self._outfile = open(self.project.output_file + ".dat", "w")
self._outfile.write("#")
self._outfile_keys = self.dom.start.keys()
self._outfile_keys.append('rfac')
for name in self._outfile_keys:
self._outfile.write(" " + name)
self._outfile.write("\n")
def run(self):
"""
starts the minimization
"""
# pack initial guess, bounds, constant parameters
nparams = len(self.dom.start)
fit_params = np.zeros((nparams))
params_index = {}
const_params = self.dom.max.copy()
bounds = []
n_fit_params = 0
for key in self.dom.start:
if self.dom.max[key] > self.dom.min[key]:
fit_params[n_fit_params] = self.dom.start[key]
params_index[key] = n_fit_params
n_fit_params += 1
bounds.append((self.dom.min[key], self.dom.max[key]))
fit_params.resize((n_fit_params))
fit_result = so.minimize(self._minfunc, fit_params,
args=(params_index, const_params),
method='L-BFGS-B', jac=True, bounds=bounds)
msc_result = const_params.copy()
for key, index in params_index.items():
msc_result[key] = fit_result.x[index]
msc_result['rfac'] = fit_result.fun
self._outfile.write("# result of gradient optimization\n")
self._outfile.write("# success = {0}, iterations = {1}, calculations = {2}\n".format(fit_result.success, fit_result.nit, fit_result.nfev))
self._outfile.write("# message: {0}\n".format(fit_result.message))
for name in self._outfile_keys:
self._outfile.write(" " + str(msc_result[name]))
self._outfile.write("\n")
def _minfunc(self, fit_params, params_index, const_params):
"""
function to be minimized
fit_params (numpy.ndarray): current fit position
master (MscoMaster): reference to the master process
params_index (dict): dictionary of fit parameters
and their index in fit_params.
key=MSC parameter name, value=index to fit_params.
const_params (dict): dictionary of MSC parameters
holding (at least) the constant parameter values.
a copy of this instance, updated with the current fit position,
is passed to MSC.
"""
# unpack parameters
msc_params = const_params.copy()
for key, index in params_index.items():
msc_params[key] = fit_params[index]
# run MSC calculations
rfac, jac_dict = self.run_msc_calcs(msc_params, params_index)
# pack jacobian
jac_arr = np.zeros_like(fit_params)
for key, index in params_index.items():
jac_arr[index] = jac_dict[key]
return rfac, jac_arr
def run_msc_calcs(self, params, params_index):
"""
params: dictionary of actual parameters
params_index: dictionary of fit parameter indices.
only the keys are used here
to decide for which parameters the derivative is calculated.
returns:
(float) R-factor at the params location
(dict) approximate gradient at the params location
"""
# distribute tasks for gradient
slave_rank = 1
for key in params_index:
params2 = params.copy()
params2[key] += self.dom.step[key]
params2['key'] = key
self.comm.send(params2, dest=slave_rank, tag=TAG_NEW_TASK)
slave_rank += 1
# run calculation for actual position
result0 = self.calc(params)
for name in self._outfile_keys:
self._outfile.write(" " + str(result0[name]))
self._outfile.write("\n")
# gather results
s = MPI.Status()
jacobian = params.copy()
for slave in range(1, slave_rank):
result1 = self.comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
if s.tag == TAG_NEW_RESULT:
key = result1['key']
jacobian[key] = (result1['rfac'] - result0['rfac']) / (result1[key] - result0[key])
for name in self._outfile_keys:
self._outfile.write(" " + str(result1[name]))
self._outfile.write("\n")
self._outfile.flush()
return result0['rfac'], jacobian
def cleanup(self):
"""
cleanup: close output file, terminate slave processes
"""
self._outfile.close()
for rank in range(1, self.running_slaves + 1):
self.comm.send(None, dest=rank, tag=TAG_FINISH)
super(MscMaster, self).cleanup()
class MscSlave(MscProcess):
def run(self):
"""
Waits for messages from the master and dispatches tasks.
"""
s = MPI.Status()
self.running = True
while self.running:
data = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
if s.tag == TAG_NEW_TASK:
self.accept_task(data)
elif s.tag == TAG_FINISH:
self.running = False
def accept_task(self, pars):
"""
Executes a calculation task and returns the result to the master.
"""
result = self.calc(pars)
self.comm.send(result, dest=0, tag=TAG_NEW_RESULT)
self.iteration += 1
def optimize(project):
"""
main entry point for optimization
rank 0: starts the calculation, distributes tasks
ranks 1...N-1: work on assignments from rank 0
"""
mpi_comm = MPI.COMM_WORLD
mpi_rank = mpi_comm.Get_rank()
if mpi_rank == 0:
master = MscMaster(mpi_comm)
master.setup(project)
master.run()
master.cleanup()
else:
slave = MscSlave(mpi_comm)
slave.setup(project)
slave.run()
slave.cleanup()