400 lines
16 KiB
Python
Executable File
400 lines
16 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
@package pmsco.pmsco
|
|
PEARL Multiple-Scattering Calculation and Structural Optimization
|
|
|
|
this is the top-level interface of the PMSCO package.
|
|
all calculations (any mode, any project) start by calling the run_project() function of this module.
|
|
the module also provides a command line parser for common options.
|
|
|
|
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
|
note that in parallel mode, one process takes the role of the coordinator (master).
|
|
the master does not run calculations and is idle most of the time.
|
|
to benefit from parallel execution on a work station, NN should be the number of processors plus one.
|
|
on a cluster, the number of processes is chosen according to the available resources.
|
|
|
|
all calculations can also be run in a single process.
|
|
PMSCO serializes the calculations automatically.
|
|
|
|
the code of the main module is independent of a particular calculation project.
|
|
all project-specific code must be in a separate python module.
|
|
the project module must implement a class derived from pmsco.project.Project,
|
|
and call run_project() with an instance of the project class.
|
|
refer to the projects folder for examples.
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
from builtins import range
|
|
import datetime
|
|
import logging
|
|
import importlib
|
|
import os.path
|
|
import sys
|
|
|
|
from mpi4py import MPI
|
|
|
|
import pmsco.dispatch as dispatch
|
|
import pmsco.files as files
|
|
import pmsco.handlers as handlers
|
|
from pmsco.optimizers import genetic, swarm, grid, table
|
|
|
|
# the module-level logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
|
|
"""
|
|
configure the root logger. direct the logs either to a file or the null handler.
|
|
|
|
this function must be called before the first logging command
|
|
whether a log output is requested or not.
|
|
to disable logging, call this function with enable=False (default).
|
|
|
|
modules should create their own loggers, by calling
|
|
@code logger = logging.getLogger(__name__) @endcode
|
|
at the top of the module code.
|
|
that logger is then used by calls like
|
|
@code logger.debug(message) @endcode.
|
|
|
|
@param enable: (bool) True=enable logging to the specified file,
|
|
False=do not generate a log (null handler).
|
|
@param filename: (string) path and name of the log file.
|
|
if this process is part of an MPI communicator,
|
|
the function inserts a dot and the MPI rank of this process before the extension.
|
|
@param level: (string) name of the log level.
|
|
must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
|
|
if empty or invalid, the function raises a ValueError.
|
|
@return None
|
|
"""
|
|
numeric_level = getattr(logging, level.upper(), None)
|
|
if not isinstance(numeric_level, int):
|
|
raise ValueError('Invalid log level: %s' % level)
|
|
|
|
logger = logging.getLogger("")
|
|
logger.setLevel(numeric_level)
|
|
|
|
logformat = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
|
formatter = logging.Formatter(logformat)
|
|
|
|
if enable:
|
|
mpi_comm = MPI.COMM_WORLD
|
|
mpi_size = mpi_comm.Get_size()
|
|
if mpi_size > 1:
|
|
mpi_rank = mpi_comm.Get_rank()
|
|
root, ext = os.path.splitext(filename)
|
|
filename = root + "." + str(mpi_rank) + ext
|
|
|
|
handler = logging.FileHandler(filename, mode="w", delay=True)
|
|
handler.setLevel(numeric_level)
|
|
|
|
handler.setFormatter(formatter)
|
|
else:
|
|
handler = logging.NullHandler()
|
|
|
|
logger.addHandler(handler)
|
|
|
|
|
|
def set_common_args(project, args):
|
|
"""
|
|
set common project arguments from parsed command line.
|
|
|
|
this function translates and distributes the common arguments from the command line parser
|
|
to the respective destinations.
|
|
as of this writing, there are two destinations: the global logger and the project instance.
|
|
|
|
note that run_project() is called with the project instance as the only argument.
|
|
all project-related arguments from the command line must therefore be copied to the project object.
|
|
|
|
@param args: a namespace object containing the necessary parameters.
|
|
this can be an instance of Args, or the return value of parse_cli(),
|
|
or any object which has the same attributes as the Args class.
|
|
|
|
@return: None
|
|
"""
|
|
log_file = "pmsco.log"
|
|
|
|
if args.data_dir:
|
|
project.data_dir = args.data_dir
|
|
if args.output_file:
|
|
project.set_output(args.output_file)
|
|
log_file = args.output_file + ".log"
|
|
if args.log_file:
|
|
log_file = args.log_file
|
|
setup_logging(enable=args.log_enable, filename=log_file, level=args.log_level)
|
|
|
|
logger.debug("creating project")
|
|
mode = args.mode.lower()
|
|
if mode in {'single', 'grid', 'swarm', 'genetic', 'table'}:
|
|
project.mode = mode
|
|
else:
|
|
logger.error("invalid optimization mode '%s'.", mode)
|
|
|
|
if args.pop_size:
|
|
project.optimizer_params['pop_size'] = args.pop_size
|
|
|
|
if args.seed_file:
|
|
project.optimizer_params['seed_file'] = args.seed_file
|
|
if args.seed_limit:
|
|
project.optimizer_params['seed_limit'] = args.seed_limit
|
|
if args.table_file:
|
|
project.optimizer_params['table_file'] = args.table_file
|
|
|
|
if args.time_limit:
|
|
project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))
|
|
|
|
if args.keep_files:
|
|
if "all" in args.keep_files:
|
|
cats = set([])
|
|
else:
|
|
cats = files.FILE_CATEGORIES - set(args.keep_files)
|
|
cats -= {'report'}
|
|
if mode == 'single':
|
|
cats -= {'model'}
|
|
project.files.categories_to_delete = cats
|
|
if args.keep_levels > project.keep_levels:
|
|
project.keep_levels = args.keep_levels
|
|
if args.keep_best > project.keep_best:
|
|
project.keep_best = args.keep_best
|
|
|
|
|
|
def run_project(project):
|
|
"""
|
|
run a calculation project.
|
|
|
|
@param project:
|
|
@return:
|
|
"""
|
|
# log project arguments only in rank 0
|
|
mpi_comm = MPI.COMM_WORLD
|
|
mpi_rank = mpi_comm.Get_rank()
|
|
if mpi_rank == 0:
|
|
project.log_project_args()
|
|
|
|
optimizer_class = None
|
|
if project.mode == 'single':
|
|
optimizer_class = handlers.SingleModelHandler
|
|
elif project.mode == 'grid':
|
|
optimizer_class = grid.GridSearchHandler
|
|
elif project.mode == 'swarm':
|
|
optimizer_class = swarm.ParticleSwarmHandler
|
|
elif project.mode == 'genetic':
|
|
optimizer_class = genetic.GeneticOptimizationHandler
|
|
elif project.mode == 'gradient':
|
|
logger.error("gradient search not implemented")
|
|
# TODO: implement gradient search
|
|
# optimizer_class = gradient.GradientSearchHandler
|
|
elif project.mode == 'table':
|
|
optimizer_class = table.TableModelHandler
|
|
else:
|
|
logger.error("invalid optimization mode '%s'.", project.mode)
|
|
project.handler_classes['model'] = optimizer_class
|
|
|
|
project.handler_classes['region'] = handlers.choose_region_handler_class(project)
|
|
|
|
if project and optimizer_class:
|
|
logger.info("starting calculations")
|
|
try:
|
|
dispatch.run_calculations(project)
|
|
except (SystemExit, KeyboardInterrupt):
|
|
raise
|
|
except Exception as __:
|
|
logger.exception("unhandled exception during calculations.")
|
|
raise
|
|
else:
|
|
logger.info("calculations complete")
|
|
else:
|
|
logger.error("undefined project, optimizer, or calculator.")
|
|
|
|
|
|
class Args(object):
|
|
"""
|
|
arguments of the main function.
|
|
|
|
this class can be used to set up an arguments object for the main
|
|
function as an alternative to the __main__ function which parses
|
|
command line arguments.
|
|
|
|
the constructor initializes the attributes with the same default
|
|
values as the command line parser.
|
|
"""
|
|
|
|
def __init__(self, mode="single", output_file="pmsco_data"):
|
|
"""
|
|
constructor.
|
|
|
|
the parameters are the same as for the command line interface.
|
|
project and mode are mandatory.
|
|
other parameters may be required depending on the project
|
|
and/or the calculation mode.
|
|
"""
|
|
self.mode = mode
|
|
self.pop_size = 0
|
|
self.seed_file = ""
|
|
self.seed_limit = 0
|
|
self.data_dir = ""
|
|
self.output_file = output_file
|
|
self.time_limit = 24.0
|
|
self.keep_files = files.FILE_CATEGORIES_TO_KEEP
|
|
self.keep_best = 10
|
|
self.keep_levels = 1
|
|
self.log_level = "WARNING"
|
|
self.log_file = ""
|
|
self.log_enable = True
|
|
self.table_file = ""
|
|
|
|
|
|
def get_cli_parser(default_args=None):
|
|
if not default_args:
|
|
default_args = Args()
|
|
|
|
KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
|
|
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
description="""
|
|
multiple-scattering calculations and optimization
|
|
|
|
you must call pmsco.py from a project file which defines the calculation project.
|
|
the project file must be a regular Python module and define:
|
|
|
|
1) a project class derived from pmsco.project.Project.
|
|
the class implements/overrides all necessary methods of the calculation project,
|
|
in particular create_domain, create_cluster, and create_params.
|
|
|
|
2) a global function named create_project.
|
|
the function accepts a namespace object from the argument parser.
|
|
it may evaluate extra, project-specific arguments.
|
|
it does not need to evaluate the common parameters described below.
|
|
the function must return an instance of the project class described above.
|
|
|
|
3) main code that parses the command line and calls pmsco.pmsco.main_pmsco().
|
|
(see the projects folder for examples).
|
|
""")
|
|
# the required argument list may depend on the calculation mode.
|
|
# for simplicity, the parser does not check these requirements.
|
|
# all parameters are optional and accepted regardless of mode.
|
|
# errors may occur if implicit requirements are not met.
|
|
parser.add_argument('project_module',
|
|
help="path to custom module that defines the calculation project")
|
|
parser.add_argument('-m', '--mode', default=default_args.mode,
|
|
choices=['single', 'grid', 'swarm', 'genetic', 'table'],
|
|
help='calculation mode')
|
|
parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
|
|
help='population size (number of particles) in swarm or genetic optimization mode. ' +
|
|
'default is the greater of 4 or the number of calculation processes.')
|
|
parser.add_argument('--seed-file',
|
|
help='path and name of population seed file. ' +
|
|
'population data of previous optimizations can be used to seed a new optimization. ' +
|
|
'the file must have the same structure as the .pop or .dat files.')
|
|
parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
|
|
help='maximum number of models to use from the seed file. ' +
|
|
'the models with the best R-factors are selected.')
|
|
parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
|
|
help='directory path for experimental data files (if required by project). ' +
|
|
'default: working directory')
|
|
parser.add_argument('-o', '--output-file', default=default_args.output_file,
|
|
help='base path for intermediate and output files.')
|
|
parser.add_argument('--table-file',
|
|
help='path and name of population table file for table optimization mode. ' +
|
|
'the file must have the same structure as the .pop or .dat files.')
|
|
parser.add_argument('-k', '--keep-files', nargs='*', default=default_args.keep_files,
|
|
choices=KEEP_FILES_CHOICES,
|
|
help='output file categories to keep after the calculation. '
|
|
'by default, cluster and model (simulated data) '
|
|
'of a limited number of best models are kept.')
|
|
parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
|
|
help='number of best models for which to keep result files '
|
|
'(at each node from root down to keep-levels).')
|
|
parser.add_argument('--keep-levels', type=int, choices=range(5),
|
|
default=default_args.keep_levels,
|
|
help='task level down to which result files of best models are kept. '
|
|
'0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region.')
|
|
parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
|
|
help='wall time limit in hours. the optimizers try to finish before the limit.')
|
|
parser.add_argument('--log-file', default=default_args.log_file,
|
|
help='name of the main log file. ' +
|
|
'under MPI, the rank of the process is inserted before the extension.')
|
|
parser.add_argument('--log-level', default=default_args.log_level,
|
|
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
|
|
feature_parser = parser.add_mutually_exclusive_group(required=False)
|
|
feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
|
|
help="enable logging. by default, logging is on.")
|
|
feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false',
|
|
help="disable logging. by default, logging is on.")
|
|
parser.set_defaults(log_enable=default_args.log_enable)
|
|
|
|
return parser
|
|
|
|
|
|
def parse_cli():
|
|
"""
|
|
parse the command line interface
|
|
|
|
@return: Namespace object created by the argument parser.
|
|
"""
|
|
default_args = Args()
|
|
parser = get_cli_parser(default_args)
|
|
|
|
args, unknown_args = parser.parse_known_args()
|
|
|
|
return args, unknown_args
|
|
|
|
|
|
def import_project_module(path):
|
|
"""
|
|
import the custom project module.
|
|
|
|
imports the project module given its file path.
|
|
the path is expanded to its absolute form and appended to the python path.
|
|
|
|
@param path: path and name of the module to be loaded.
|
|
path is optional and defaults to the python path.
|
|
if the name includes an extension, it is stripped off.
|
|
|
|
@return: the loaded module as a python object
|
|
"""
|
|
path, name = os.path.split(path)
|
|
name, __ = os.path.splitext(name)
|
|
path = os.path.abspath(path)
|
|
sys.path.append(path)
|
|
project_module = importlib.import_module(name)
|
|
return project_module
|
|
|
|
|
|
def main():
|
|
args, unknown_args = parse_cli()
|
|
|
|
if args:
|
|
module = import_project_module(args.project_module)
|
|
try:
|
|
project_args = module.parse_project_args(unknown_args)
|
|
except NameError:
|
|
project_args = None
|
|
|
|
project = module.create_project()
|
|
set_common_args(project, args)
|
|
try:
|
|
module.set_project_args(project, project_args)
|
|
except NameError:
|
|
pass
|
|
|
|
run_project(project)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
sys.exit(0)
|