493 lines
18 KiB
Python
Executable File
493 lines
18 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
@package pmsco.pmsco
|
|
PEARL Multiple-Scattering Calculation and Structural Optimization
|
|
|
|
this is the top-level interface of the PMSCO package.
|
|
all calculations (any mode, any project) start by calling the run_project() function of this module.
|
|
the module also provides a command line and a run-file/run-dict interface.
|
|
|
|
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
|
note that in parallel mode, one process takes the role of the coordinator (master).
|
|
the master does not run calculations and is idle most of the time.
|
|
to benefit from parallel execution on a work station, NN should be the number of processors.
|
|
on a cluster, the number of processes is chosen according to the available resources.
|
|
|
|
all calculations can also be run in a single process.
|
|
PMSCO serializes the calculations automatically.
|
|
|
|
the code of the main module is independent of a particular calculation project.
|
|
all project-specific code must be in a separate python module.
|
|
the project module must implement a class derived from pmsco.project.Project,
|
|
and call run_project() with an instance of the project class.
|
|
refer to the projects folder for examples.
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import argparse
|
|
from builtins import range
|
|
import logging
|
|
import importlib
|
|
import commentjson as json
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
try:
|
|
from mpi4py import MPI
|
|
mpi_comm = MPI.COMM_WORLD
|
|
mpi_size = mpi_comm.Get_size()
|
|
mpi_rank = mpi_comm.Get_rank()
|
|
except ImportError:
|
|
MPI = None
|
|
mpi_comm = None
|
|
mpi_size = 1
|
|
mpi_rank = 0
|
|
|
|
pmsco_root = Path(__file__).resolve().parent.parent
|
|
if str(pmsco_root) not in sys.path:
|
|
sys.path.insert(0, str(pmsco_root))
|
|
|
|
import pmsco.dispatch as dispatch
|
|
import pmsco.files as files
|
|
import pmsco.handlers as handlers
|
|
from pmsco.optimizers import genetic, swarm, grid, table
|
|
|
|
# the module-level logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
|
|
"""
|
|
configure the root logger. direct the logs either to a file or the null handler.
|
|
|
|
this function must be called before the first logging command
|
|
whether a log output is requested or not.
|
|
to disable logging, call this function with enable=False (default).
|
|
|
|
modules should create their own loggers, by calling
|
|
@code logger = logging.getLogger(__name__) @endcode
|
|
at the top of the module code.
|
|
that logger is then used by calls like
|
|
@code logger.debug(message) @endcode.
|
|
|
|
@param enable: (bool) True=enable logging to the specified file,
|
|
False=do not generate a log (null handler).
|
|
@param filename: (Path-like) path and name of the log file.
|
|
if this process is part of an MPI communicator,
|
|
the function inserts a dot and the MPI rank of this process before the extension.
|
|
if the filename is empty, logging is disabled.
|
|
@param level: (string) name of the log level.
|
|
must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
|
|
if empty, logging is disabled.
|
|
if not a valid level, defaults to "WARNING".
|
|
@return None
|
|
"""
|
|
enable = enable and str(filename) and level
|
|
numeric_level = getattr(logging, level.upper(), logging.WARNING)
|
|
root_logger = logging.getLogger()
|
|
root_logger.setLevel(numeric_level)
|
|
|
|
if enable:
|
|
if mpi_size > 1:
|
|
p = Path(filename)
|
|
filename = p.with_suffix(f".{mpi_rank}" + p.suffix)
|
|
|
|
log_format = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
|
formatter = logging.Formatter(log_format)
|
|
|
|
handler = logging.FileHandler(filename, mode="w", delay=True)
|
|
handler.setLevel(numeric_level)
|
|
handler.setFormatter(formatter)
|
|
else:
|
|
handler = logging.NullHandler()
|
|
|
|
root_logger.addHandler(handler)
|
|
|
|
|
|
def set_common_args(project, args):
|
|
"""
|
|
set common project arguments from parsed command line.
|
|
|
|
this function translates and distributes the common arguments from the command line parser
|
|
to the respective destinations.
|
|
as of this writing, there are two destinations: the global logger and the project instance.
|
|
|
|
note that run_project() is called with the project instance as the only argument.
|
|
all project-related arguments from the command line must therefore be copied to the project object.
|
|
|
|
@param args: a namespace object containing the necessary parameters.
|
|
this can be an instance of Args, or the return value of parse_cli(),
|
|
or any object which has the same attributes as the Args class.
|
|
|
|
@return: None
|
|
"""
|
|
|
|
if args.data_dir:
|
|
project.data_dir = args.data_dir
|
|
if args.output_file:
|
|
project.output_file = args.output_file
|
|
if args.db_file:
|
|
project.db_file = args.db_file
|
|
if args.log_file:
|
|
project.log_file = args.log_file
|
|
if args.log_level:
|
|
project.log_level = args.log_level
|
|
if not args.log_enable:
|
|
project.log_file = ""
|
|
project.log_level = ""
|
|
if args.mode:
|
|
project.mode = args.mode.lower()
|
|
if args.time_limit:
|
|
project.time_limit = args.time_limit
|
|
if args.keep_files:
|
|
project.keep_files = args.keep_files
|
|
if args.keep_levels:
|
|
project.keep_levels = max(args.keep_levels, project.keep_levels)
|
|
if args.keep_best:
|
|
project.keep_best = max(args.keep_best, project.keep_best)
|
|
|
|
|
|
def run_project(project):
|
|
"""
|
|
run a calculation project.
|
|
|
|
the function sets up logging, validates the project, chooses the handler classes,
|
|
and passes control to the pmsco.dispatch module to run the calculations.
|
|
|
|
@param project: fully initialized project object.
|
|
the validate method is called as part of this function after setting up the logger.
|
|
@return: None
|
|
"""
|
|
|
|
log_file = Path(project.log_file)
|
|
if not log_file.name:
|
|
log_file = Path(project.job_name).with_suffix(".log")
|
|
if log_file.name:
|
|
log_file.parent.mkdir(exist_ok=True)
|
|
log_level = project.log_level
|
|
else:
|
|
log_level = ""
|
|
setup_logging(enable=bool(log_level), filename=log_file, level=log_level)
|
|
if mpi_rank == 0:
|
|
project.log_project_args()
|
|
|
|
project.validate()
|
|
|
|
optimizer_class = None
|
|
if project.mode == 'single':
|
|
optimizer_class = handlers.SingleModelHandler
|
|
elif project.mode == 'grid':
|
|
optimizer_class = grid.GridSearchHandler
|
|
elif project.mode == 'swarm':
|
|
optimizer_class = swarm.ParticleSwarmHandler
|
|
elif project.mode == 'genetic':
|
|
optimizer_class = genetic.GeneticOptimizationHandler
|
|
elif project.mode == 'gradient':
|
|
logger.error("gradient search not implemented")
|
|
# TODO: implement gradient search
|
|
# optimizer_class = gradient.GradientSearchHandler
|
|
elif project.mode == 'table':
|
|
optimizer_class = table.TableModelHandler
|
|
else:
|
|
logger.error("invalid optimization mode '%s'.", project.mode)
|
|
project.handler_classes['model'] = optimizer_class
|
|
|
|
project.handler_classes['region'] = handlers.choose_region_handler_class(project)
|
|
|
|
if project and optimizer_class:
|
|
logger.info("starting calculations")
|
|
try:
|
|
dispatch.run_calculations(project)
|
|
except (SystemExit, KeyboardInterrupt):
|
|
raise
|
|
except Exception as __:
|
|
logger.exception("unhandled exception during calculations.")
|
|
raise
|
|
else:
|
|
logger.info("calculations complete")
|
|
else:
|
|
logger.error("undefined project, optimizer, or calculator.")
|
|
|
|
|
|
def schedule_project(project, run_dict):
|
|
"""
|
|
schedule a calculation project.
|
|
|
|
the function validates the project and submits a job to the scheduler.
|
|
|
|
@param project: fully initialized project object.
|
|
the validate method is called as part of this function.
|
|
|
|
@param run_dict: dictionary holding the contents of the run file.
|
|
|
|
@return: None
|
|
"""
|
|
assert mpi_rank == 0
|
|
setup_logging(enable=False)
|
|
|
|
project.validate()
|
|
|
|
schedule_dict = run_dict['schedule']
|
|
module = importlib.import_module(schedule_dict['__module__'])
|
|
schedule_class = getattr(module, schedule_dict['__class__'])
|
|
schedule = schedule_class(project)
|
|
schedule.set_properties(module, schedule_dict, project)
|
|
schedule.run_dict = run_dict
|
|
schedule.validate()
|
|
schedule.submit()
|
|
|
|
|
|
class Args(object):
|
|
"""
|
|
arguments of the main function.
|
|
|
|
this class can be used to set up an arguments object for the main
|
|
function as an alternative to the __main__ function which parses
|
|
command line arguments.
|
|
|
|
the constructor initializes the attributes with the same default
|
|
values as the command line parser.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""
|
|
constructor.
|
|
|
|
the parameters are the same as for the command line interface.
|
|
project and mode are mandatory.
|
|
other parameters may be required depending on the project
|
|
and/or the calculation mode.
|
|
"""
|
|
self.data_dir = ""
|
|
self.output_file = ""
|
|
self.db_file = ""
|
|
self.time_limit = 24.0
|
|
self.keep_files = files.FILE_CATEGORIES_TO_KEEP
|
|
self.keep_best = 10
|
|
self.keep_levels = 1
|
|
self.log_level = "WARNING"
|
|
self.log_file = ""
|
|
self.log_enable = True
|
|
|
|
|
|
def get_cli_parser():
|
|
KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
|
|
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
description="""
|
|
multiple-scattering calculations and optimization
|
|
|
|
you must call pmsco.py from a project file which defines the calculation project.
|
|
the project file must be a regular Python module and define:
|
|
|
|
1) a project class derived from pmsco.project.Project.
|
|
the class implements/overrides all necessary methods of the calculation project,
|
|
in particular create_model_space, create_cluster, and create_params.
|
|
|
|
2) a global function named create_project.
|
|
the function accepts a namespace object from the argument parser.
|
|
it may evaluate extra, project-specific arguments.
|
|
it does not need to evaluate the common parameters described below.
|
|
the function must return an instance of the project class described above.
|
|
|
|
3) main code that parses the command line and calls pmsco.pmsco.main_pmsco().
|
|
(see the projects folder for examples).
|
|
""")
|
|
# the required argument list may depend on the calculation mode.
|
|
# for simplicity, the parser does not check these requirements.
|
|
# all parameters are optional and accepted regardless of mode.
|
|
# errors may occur if implicit requirements are not met.
|
|
parser.add_argument('project_module', nargs='?',
|
|
help="path to custom module that defines the calculation project")
|
|
parser.add_argument('-r', '--run-file',
|
|
help="path to run-time parameters file which contains all program arguments. " +
|
|
"must be in JSON format.")
|
|
parser.add_argument('-m', '--mode',
|
|
choices=['single', 'grid', 'swarm', 'genetic', 'table'],
|
|
help='calculation mode')
|
|
parser.add_argument('-d', '--data-dir',
|
|
help='directory path for experimental data files (if required by project). ' +
|
|
'default: working directory')
|
|
parser.add_argument('-o', '--output-file',
|
|
help='base path for intermediate and output files.')
|
|
parser.add_argument('-b', '--db-file',
|
|
help='name of an sqlite3 database file where the results should be stored.')
|
|
parser.add_argument('-k', '--keep-files', nargs='*',
|
|
choices=KEEP_FILES_CHOICES,
|
|
help='output file categories to keep after the calculation. '
|
|
'by default, cluster and model (simulated data) '
|
|
'of a limited number of best models are kept.')
|
|
parser.add_argument('--keep-best', type=int,
|
|
help='number of best models for which to keep result files '
|
|
'(at each node from root down to keep-levels).')
|
|
parser.add_argument('--keep-levels', type=int, choices=range(5),
|
|
help='task level down to which result files of best models are kept. '
|
|
'0 = model, 1 = scan, 2 = domain, 3 = emitter, 4 = region.')
|
|
parser.add_argument('-t', '--time-limit', type=float,
|
|
help='wall time limit in hours. the optimizers try to finish before the limit.')
|
|
parser.add_argument('--log-file',
|
|
help='name of the main log file. ' +
|
|
'under MPI, the rank of the process is inserted before the extension.')
|
|
parser.add_argument('--log-level',
|
|
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
|
|
feature_parser = parser.add_mutually_exclusive_group(required=False)
|
|
feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
|
|
help="enable logging. by default, logging is on.")
|
|
feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false',
|
|
help="disable logging. by default, logging is on.")
|
|
parser.set_defaults(log_enable=True)
|
|
|
|
return parser
|
|
|
|
|
|
def parse_cli():
|
|
"""
|
|
parse the command line interface
|
|
|
|
@return: Namespace object created by the argument parser.
|
|
"""
|
|
parser = get_cli_parser()
|
|
|
|
args, unknown_args = parser.parse_known_args()
|
|
|
|
return args, unknown_args
|
|
|
|
|
|
def import_module(module_name):
|
|
"""
|
|
import a custom module by name.
|
|
|
|
import a module given its file path or module name (like in an import statement).
|
|
|
|
preferably, the module name should be given as in an import statement.
|
|
as the top-level pmsco directory is on the python path,
|
|
the module name will begin with `projects` for a custom project module or `pmsco` for a core pmsco module.
|
|
in this case, the function just calls importlib.import_module.
|
|
|
|
if a file path is given, i.e., `module_name` links to an existing file and has a `.py` extension,
|
|
the function extracts the directory path,
|
|
inserts it into the python path,
|
|
and calls importlib.import_module on the stem of the file name.
|
|
|
|
@note the file path remains in the python path.
|
|
this option should be used carefully to avoid breaking file name resolution.
|
|
|
|
@param module_name: file path or module name.
|
|
file path is interpreted relative to the working directory.
|
|
|
|
@return: the loaded module as a python object
|
|
"""
|
|
p = Path(module_name)
|
|
if p.is_file() and p.suffix == ".py":
|
|
path = p.parent.resolve()
|
|
module_name = p.stem
|
|
if path not in sys.path:
|
|
sys.path.insert(0, path)
|
|
|
|
module = importlib.import_module(module_name)
|
|
return module
|
|
|
|
|
|
def main_dict(run_params):
|
|
"""
|
|
main function with dictionary run-time parameters
|
|
|
|
this starts the whole process with all direct parameters.
|
|
the command line is not parsed.
|
|
no run-file is loaded (just the project module).
|
|
|
|
@param run_params: dictionary with the same structure as the JSON run-file.
|
|
|
|
@return: None
|
|
"""
|
|
project_params = run_params['project']
|
|
|
|
module = importlib.import_module(project_params['__module__'])
|
|
try:
|
|
project_class = getattr(module, project_params['__class__'])
|
|
except KeyError:
|
|
project = module.create_project()
|
|
else:
|
|
project = project_class()
|
|
|
|
project._module = module
|
|
project.directories['pmsco'] = Path(__file__).parent
|
|
project.directories['project'] = Path(module.__file__).parent
|
|
project.set_properties(module, project_params, project)
|
|
run_project(project)
|
|
|
|
|
|
def main():
|
|
"""
|
|
main function with command line parsing
|
|
|
|
this function starts the whole process with parameters from the command line.
|
|
|
|
if the command line contains a run-file parameter, it determines the module to load and the project parameters.
|
|
otherwise, the command line parameters apply.
|
|
|
|
the project class can be specified either in the run-file or the project module.
|
|
if the run-file specifies a class name, that class is looked up in the project module and instantiated.
|
|
otherwise, the module's create_project is called.
|
|
|
|
@return: None
|
|
"""
|
|
args, unknown_args = parse_cli()
|
|
|
|
try:
|
|
with open(args.run_file, 'r') as f:
|
|
rf = json.load(f)
|
|
except AttributeError:
|
|
rfp = {'__module__': args.project_module}
|
|
else:
|
|
rfp = rf['project']
|
|
|
|
module = import_module(rfp['__module__'])
|
|
try:
|
|
project_args = module.parse_project_args(unknown_args)
|
|
except AttributeError:
|
|
project_args = None
|
|
|
|
try:
|
|
project_class = getattr(module, rfp['__class__'])
|
|
except (AttributeError, KeyError):
|
|
project = module.create_project()
|
|
else:
|
|
project = project_class()
|
|
project_args = None
|
|
|
|
project._module = module
|
|
project.directories['pmsco'] = Path(__file__).parent
|
|
project.directories['project'] = Path(module.__file__).parent
|
|
project.set_properties(module, rfp, project)
|
|
|
|
set_common_args(project, args)
|
|
try:
|
|
if project_args:
|
|
module.set_project_args(project, project_args)
|
|
except AttributeError:
|
|
pass
|
|
|
|
try:
|
|
schedule_enabled = rf['schedule']['enabled']
|
|
except KeyError:
|
|
schedule_enabled = False
|
|
if schedule_enabled:
|
|
schedule_project(project, rf)
|
|
else:
|
|
run_project(project)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
sys.exit(0)
|