382 lines
14 KiB
Python
Executable File
382 lines
14 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
@package pmsco.pmsco
|
|
PEARL Multiple-Scattering Calculation and Structural Optimization
|
|
|
|
this is the main entry point and top-level interface of the PMSCO package.
|
|
all calculations (any mode, any project) start by calling the main_pmsco() function of this module.
|
|
the module also provides a command line parser.
|
|
|
|
command line usage: call with -h option to see the list of arguments.
|
|
|
|
python usage: call main_pmsco() with suitable arguments.
|
|
|
|
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
|
note that in parallel mode, one process takes the role of the coordinator (master).
|
|
the master does not run calculations and is idle most of the time.
|
|
to benefit from parallel execution on a work station, NN should be the number of processors plus one.
|
|
on a cluster, the number of processes is chosen according to the available resources.
|
|
|
|
all calculations can also be run in a single process.
|
|
PMSCO serializes the calculations automatically.
|
|
|
|
the code of the main module is independent of a particular calculation project.
|
|
all project-specific code must be in a separate python module.
|
|
the project module must implement a class derived from pmsco.project.Project,
|
|
and a global function create_project which returns a new instance of the derived project class.
|
|
refer to the projects folder for examples.
|
|
|
|
@pre
|
|
* python 2.7, including python-pip
|
|
* numpy
|
|
* nose from Debian python-nose
|
|
* statsmodels from Debian python-statsmodels, or PyPI (https://pypi.python.org/pypi/statsmodels)
|
|
* periodictable from PyPI (https://pypi.python.org/pypi/periodictable)
|
|
* mpi4py from PyPI (the Debian package may have a bug causing the program to crash)
|
|
* OpenMPI, including libopenmpi-dev
|
|
* SWIG from Debian swig
|
|
|
|
to install a PyPI package, e.g. periodictable, do
|
|
@code{.sh}
|
|
pip install --user periodictable
|
|
@endcode
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import os.path
|
|
import sys
|
|
import datetime
|
|
import argparse
|
|
import logging
|
|
import cluster
|
|
import dispatch
|
|
import handlers
|
|
import files
|
|
import calculator
|
|
import swarm
|
|
import grid
|
|
# import gradient
|
|
from mpi4py import MPI
|
|
|
|
# the module-level logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
|
|
"""
|
|
configure the root logger. direct the logs either to a file or the null handler.
|
|
|
|
this function must be called before the first logging command
|
|
whether a log output is requested or not.
|
|
to disable logging, call this function with enable=False (default).
|
|
|
|
modules should create their own loggers, by calling
|
|
@code logger = logging.getLogger(__name__) @endcode
|
|
at the top of the module code.
|
|
that logger is then used by calls like
|
|
@code logger.debug(message) @endcode.
|
|
|
|
@param enable: (bool) True=enable logging to the specified file,
|
|
False=do not generate a log (null handler).
|
|
@param filename: (string) path and name of the log file.
|
|
if this process is part of an MPI communicator,
|
|
the function inserts a dot and the MPI rank of this process before the extension.
|
|
@param level: (string) name of the log level.
|
|
must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
|
|
if empty or invalid, the function raises a ValueError.
|
|
@return None
|
|
"""
|
|
numeric_level = getattr(logging, level.upper(), None)
|
|
if not isinstance(numeric_level, int):
|
|
raise ValueError('Invalid log level: %s' % level)
|
|
|
|
logger = logging.getLogger("")
|
|
logger.setLevel(numeric_level)
|
|
|
|
logformat = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
|
formatter = logging.Formatter(logformat)
|
|
|
|
if enable:
|
|
mpi_comm = MPI.COMM_WORLD
|
|
mpi_size = mpi_comm.Get_size()
|
|
if mpi_size > 1:
|
|
mpi_rank = mpi_comm.Get_rank()
|
|
root, ext = os.path.splitext(filename)
|
|
filename = root + "." + str(mpi_rank) + ext
|
|
|
|
handler = logging.FileHandler(filename, mode="w", delay=True)
|
|
handler.setLevel(numeric_level)
|
|
|
|
handler.setFormatter(formatter)
|
|
else:
|
|
handler = logging.NullHandler()
|
|
|
|
logger.addHandler(handler)
|
|
|
|
|
|
def set_common_args(project, args):
|
|
"""
|
|
set common project arguments from parsed command line.
|
|
|
|
this function translates and distributes the common arguments from the command line parser
|
|
to the respective destinations.
|
|
as of this writing, there are two destinations: the global logger and the project instance.
|
|
|
|
note that run_project() is called with the project instance as the only argument.
|
|
all project-related arguments from the command line must therefore be copied to the project object.
|
|
|
|
@param args: a namespace object containing the necessary parameters.
|
|
this can be an instance of Args, or the return value of parse_cli(),
|
|
or any object which has the same attributes as the Args class.
|
|
|
|
@return: None
|
|
"""
|
|
log_file = "pmsco.log"
|
|
|
|
if args.data_dir:
|
|
project.data_dir = args.data_dir
|
|
if args.output_file:
|
|
project.set_output(args.output_file)
|
|
log_file = args.output_file + ".log"
|
|
if args.log_file:
|
|
log_file = args.log_file
|
|
setup_logging(enable=args.log_enable, filename=log_file, level=args.log_level)
|
|
|
|
logger.debug("creating project")
|
|
mode = args.mode.lower()
|
|
if mode in {'single', 'grid', 'swarm'}:
|
|
project.mode = mode
|
|
else:
|
|
logger.error("invalid optimization mode '%s'.", mode)
|
|
|
|
if args.pop_size:
|
|
project.pop_size = args.pop_size
|
|
|
|
code = args.code.lower()
|
|
if code in {'edac', 'msc', 'test'}:
|
|
project.code = code
|
|
else:
|
|
logger.error("invalid code argument")
|
|
|
|
if args.time_limit:
|
|
project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))
|
|
|
|
if args.keep_files:
|
|
if "all" in args.keep_files:
|
|
cats = set([])
|
|
else:
|
|
cats = files.FILE_CATEGORIES - set(args.keep_files)
|
|
cats -= {'report'}
|
|
if mode == 'single':
|
|
cats -= {'model'}
|
|
project.files.categories_to_delete = cats
|
|
|
|
|
|
def log_project_args(project):
|
|
"""
|
|
send some common project arguments to the log.
|
|
|
|
@param project: project instance (sub-class of pmsco.project.Project).
|
|
@return: None
|
|
"""
|
|
try:
|
|
logger.info("scattering code: {0}".format(project.code))
|
|
logger.info("optimization mode: {0}".format(project.mode))
|
|
logger.info("minimum swarm size: {0}".format(project.pop_size))
|
|
|
|
logger.info("data directory: {0}".format(project.data_dir))
|
|
logger.info("output file: {0}".format(project.output_file))
|
|
|
|
_files_to_keep = files.FILE_CATEGORIES - project.files.categories_to_delete
|
|
logger.info("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
|
|
except AttributeError:
|
|
logger.warning("AttributeError in log_project_args")
|
|
|
|
|
|
def run_project(project):
|
|
"""
|
|
run a calculation project.
|
|
|
|
@param project:
|
|
@return:
|
|
"""
|
|
log_project_args(project)
|
|
|
|
optimizer_class = None
|
|
if project.mode == 'single':
|
|
optimizer_class = handlers.SingleModelHandler
|
|
elif project.mode == 'grid':
|
|
optimizer_class = grid.GridSearchHandler
|
|
elif project.mode == 'swarm':
|
|
optimizer_class = swarm.ParticleSwarmHandler
|
|
elif project.mode == 'gradient':
|
|
logger.error("gradient search not implemented")
|
|
# TODO: implement gradient search
|
|
# optimizer_class = gradient.GradientSearchHandler
|
|
else:
|
|
logger.error("invalid optimization mode '%s'.", project.mode)
|
|
project.handler_classes['model'] = optimizer_class
|
|
|
|
project.handler_classes['region'] = handlers.choose_region_handler_class(project)
|
|
|
|
calculator_class = None
|
|
if project.code == 'edac':
|
|
logger.debug("importing EDAC interface")
|
|
import edac_calculator
|
|
project.cluster_format = cluster.FMT_EDAC
|
|
calculator_class = edac_calculator.EdacCalculator
|
|
elif project.code == 'msc':
|
|
logger.debug("importing MSC interface")
|
|
import msc_calculator
|
|
project.cluster_format = cluster.FMT_MSC
|
|
calculator_class = msc_calculator.MscCalculator
|
|
elif project.code == 'test':
|
|
logger.debug("importing TEST interface")
|
|
project.cluster_format = cluster.FMT_EDAC
|
|
calculator_class = calculator.TestCalculator
|
|
else:
|
|
logger.error("invalid code argument")
|
|
project.calculator_class = calculator_class
|
|
|
|
if project and optimizer_class and calculator_class:
|
|
logger.info("starting calculations")
|
|
try:
|
|
dispatch.run_calculations(project)
|
|
except (SystemExit, KeyboardInterrupt):
|
|
raise
|
|
except Exception as __:
|
|
logger.exception("unhandled exception during calculations.")
|
|
raise
|
|
else:
|
|
logger.info("calculations complete")
|
|
else:
|
|
logger.error("undefined project, optimizer, or calculator.")
|
|
|
|
|
|
class Args(object):
|
|
"""
|
|
arguments of the main function.
|
|
|
|
this class can be used to set up an arguments object for the main
|
|
function as an alternative to the __main__ function which parses
|
|
command line arguments.
|
|
|
|
the constructor initializes the attributes with the same default
|
|
values as the command line parser.
|
|
"""
|
|
|
|
def __init__(self, mode="single", code="edac", output_file=""):
|
|
"""
|
|
constructor.
|
|
|
|
the parameters are the same as for the command line interface.
|
|
project and mode are mandatory.
|
|
other parameters may be required depending on the project
|
|
and/or the calculation mode.
|
|
"""
|
|
self.mode = mode
|
|
self.pop_size = 0
|
|
self.code = code
|
|
self.data_dir = os.getcwd()
|
|
self.output_file = output_file
|
|
self.time_limit = 24.0
|
|
self.keep_files = []
|
|
self.log_level = "WARNING"
|
|
self.log_file = ""
|
|
self.log_enable = True
|
|
|
|
|
|
def get_cli_parser(default_args=None):
|
|
if not default_args:
|
|
default_args = Args()
|
|
|
|
KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="""
|
|
multiple-scattering calculations and optimization
|
|
|
|
you must call pmsco.py from a project file which defines the calculation project.
|
|
the project file must be a regular Python module and define:
|
|
|
|
1) a project class derived from pmsco.project.Project.
|
|
the class implements/overrides all necessary methods of the calculation project,
|
|
in particular create_domain, create_cluster, and create_params.
|
|
|
|
2) a global function named create_project.
|
|
the function accepts a namespace object from the argument parser.
|
|
it may evaluate extra, project-specific arguments.
|
|
it does not need to evaluate the common parameters described below.
|
|
the function must return an instance of the project class described above.
|
|
|
|
3) main code that parses the command line and calls pmsco.pmsco.main_pmsco().
|
|
(see the projects folder for examples).
|
|
""")
|
|
# the required argument list may depend on the calculation mode.
|
|
# for simplicity, the parser does not check these requirements.
|
|
# all parameters are optional and accepted regardless of mode.
|
|
# errors may occur if implicit requirements are not met.
|
|
parser.add_argument('-m', '--mode', default='single',
|
|
choices=['single', 'grid', 'swarm', 'gradient'],
|
|
help='calculation mode')
|
|
parser.add_argument('--pop-size', type=int, default=0,
|
|
help='population size (number of particles) in swarm optimization mode. ' +
|
|
'default is the greater of 4 or two times the number of calculation processes.')
|
|
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default="edac",
|
|
help='scattering code (default: edac)')
|
|
parser.add_argument('-d', '--data-dir', default=os.getcwd(),
|
|
help='directory path for experimental data files (if required by project). ' +
|
|
'default: working directory')
|
|
parser.add_argument('-o', '--output-file',
|
|
help='base path for intermediate and output files.' +
|
|
'default: pmsco_data')
|
|
parser.add_argument('-k', '--keep-files', nargs='*', default=files.FILE_CATEGORIES_TO_KEEP,
|
|
choices=KEEP_FILES_CHOICES,
|
|
help='output file categories to keep after the calculation. '
|
|
'by default, cluster and model (simulated data) '
|
|
'of a limited number of best models are kept.')
|
|
parser.add_argument('-t', '--time-limit', type=float, default=24.0,
|
|
help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
|
|
parser.add_argument('--log-file', default=default_args.log_file,
|
|
help='name of the main log file. ' +
|
|
'under MPI, the rank of the process is inserted before the extension. ' +
|
|
'defaults: output file + log, or pmsco.log.')
|
|
parser.add_argument('--log-level', default=default_args.log_level,
|
|
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL. default: WARNING.')
|
|
feature_parser = parser.add_mutually_exclusive_group(required=False)
|
|
feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
|
|
help="enable logging. by default, logging is on.")
|
|
feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false',
|
|
help="disable logging. by default, logging is on.")
|
|
parser.set_defaults(log_enable=default_args.log_enable)
|
|
|
|
return parser
|
|
|
|
|
|
def parse_cli():
|
|
"""
|
|
parse the command line interface
|
|
|
|
@return: Namespace object created by the argument parser.
|
|
"""
|
|
default_args = Args()
|
|
parser = get_cli_parser(default_args)
|
|
|
|
args, unknown_args = parser.parse_known_args()
|
|
|
|
return args, unknown_args
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main_parser = get_cli_parser()
|
|
main_parser.print_help()
|
|
sys.exit(0)
|