#!/usr/bin/env python """ @package pmsco.pmsco PEARL Multiple-Scattering Calculation and Structural Optimization this is the top-level interface of the PMSCO package. all calculations (any mode, any project) start by calling the run_project() function of this module. the module also provides a command line parser for common options. for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use. note that in parallel mode, one process takes the role of the coordinator (master). the master does not run calculations and is idle most of the time. to benefit from parallel execution on a work station, NN should be the number of processors plus one. on a cluster, the number of processes is chosen according to the available resources. all calculations can also be run in a single process. PMSCO serializes the calculations automatically. the code of the main module is independent of a particular calculation project. all project-specific code must be in a separate python module. the project module must implement a class derived from pmsco.project.Project, and call run_project() with an instance of the project class. refer to the projects folder for examples. @author Matthias Muntwiler, matthias.muntwiler@psi.ch @copyright (c) 2015-18 by Paul Scherrer Institut @n Licensed under the Apache License, Version 2.0 (the "License"); @n you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse from builtins import range import datetime import logging import importlib import os.path import sys from mpi4py import MPI import pmsco.dispatch as dispatch import pmsco.files as files import pmsco.handlers as handlers from pmsco.optimizers import genetic, swarm, grid, table # the module-level logger logger = logging.getLogger(__name__) def setup_logging(enable=False, filename="pmsco.log", level="WARNING"): """ configure the root logger. direct the logs either to a file or the null handler. this function must be called before the first logging command whether a log output is requested or not. to disable logging, call this function with enable=False (default). modules should create their own loggers, by calling @code logger = logging.getLogger(__name__) @endcode at the top of the module code. that logger is then used by calls like @code logger.debug(message) @endcode. @param enable: (bool) True=enable logging to the specified file, False=do not generate a log (null handler). @param filename: (string) path and name of the log file. if this process is part of an MPI communicator, the function inserts a dot and the MPI rank of this process before the extension. @param level: (string) name of the log level. must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". if empty or invalid, the function raises a ValueError. @return None """ numeric_level = getattr(logging, level.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % level) logger = logging.getLogger("") logger.setLevel(numeric_level) logformat = '%(asctime)s (%(name)s) %(levelname)s: %(message)s' formatter = logging.Formatter(logformat) if enable: mpi_comm = MPI.COMM_WORLD mpi_size = mpi_comm.Get_size() if mpi_size > 1: mpi_rank = mpi_comm.Get_rank() root, ext = os.path.splitext(filename) filename = root + "." + str(mpi_rank) + ext handler = logging.FileHandler(filename, mode="w", delay=True) handler.setLevel(numeric_level) handler.setFormatter(formatter) else: handler = logging.NullHandler() logger.addHandler(handler) def set_common_args(project, args): """ set common project arguments from parsed command line. this function translates and distributes the common arguments from the command line parser to the respective destinations. as of this writing, there are two destinations: the global logger and the project instance. note that run_project() is called with the project instance as the only argument. all project-related arguments from the command line must therefore be copied to the project object. @param args: a namespace object containing the necessary parameters. this can be an instance of Args, or the return value of parse_cli(), or any object which has the same attributes as the Args class. @return: None """ log_file = "pmsco.log" if args.data_dir: project.data_dir = args.data_dir if args.output_file: project.set_output(args.output_file) log_file = args.output_file + ".log" if args.db_file: project.db_file = args.db_file if args.log_file: log_file = args.log_file setup_logging(enable=args.log_enable, filename=log_file, level=args.log_level) logger.debug("creating project") mode = args.mode.lower() if mode in {'single', 'grid', 'swarm', 'genetic', 'table'}: project.mode = mode else: logger.error("invalid optimization mode '%s'.", mode) if args.pop_size: project.optimizer_params['pop_size'] = args.pop_size if args.seed_file: project.optimizer_params['seed_file'] = args.seed_file if args.seed_limit: project.optimizer_params['seed_limit'] = args.seed_limit if args.table_file: project.optimizer_params['table_file'] = args.table_file if args.time_limit: project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit)) if args.keep_files: if "all" in args.keep_files: cats = set([]) else: cats = files.FILE_CATEGORIES - set(args.keep_files) cats -= {'report'} if mode == 'single': cats -= {'model'} project.files.categories_to_delete = cats if args.keep_levels > project.keep_levels: project.keep_levels = args.keep_levels if args.keep_best > project.keep_best: project.keep_best = args.keep_best def run_project(project): """ run a calculation project. @param project: @return: """ # log project arguments only in rank 0 mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() if mpi_rank == 0: project.log_project_args() optimizer_class = None if project.mode == 'single': optimizer_class = handlers.SingleModelHandler elif project.mode == 'grid': optimizer_class = grid.GridSearchHandler elif project.mode == 'swarm': optimizer_class = swarm.ParticleSwarmHandler elif project.mode == 'genetic': optimizer_class = genetic.GeneticOptimizationHandler elif project.mode == 'gradient': logger.error("gradient search not implemented") # TODO: implement gradient search # optimizer_class = gradient.GradientSearchHandler elif project.mode == 'table': optimizer_class = table.TableModelHandler else: logger.error("invalid optimization mode '%s'.", project.mode) project.handler_classes['model'] = optimizer_class project.handler_classes['region'] = handlers.choose_region_handler_class(project) if project and optimizer_class: logger.info("starting calculations") try: dispatch.run_calculations(project) except (SystemExit, KeyboardInterrupt): raise except Exception as __: logger.exception("unhandled exception during calculations.") raise else: logger.info("calculations complete") else: logger.error("undefined project, optimizer, or calculator.") class Args(object): """ arguments of the main function. this class can be used to set up an arguments object for the main function as an alternative to the __main__ function which parses command line arguments. the constructor initializes the attributes with the same default values as the command line parser. """ def __init__(self, mode="single", output_file="pmsco_data"): """ constructor. the parameters are the same as for the command line interface. project and mode are mandatory. other parameters may be required depending on the project and/or the calculation mode. """ self.mode = mode self.pop_size = 0 self.seed_file = "" self.seed_limit = 0 self.data_dir = "" self.output_file = output_file self.db_file = "" self.time_limit = 24.0 self.keep_files = files.FILE_CATEGORIES_TO_KEEP self.keep_best = 10 self.keep_levels = 1 self.log_level = "WARNING" self.log_file = "" self.log_enable = True self.table_file = "" def get_cli_parser(default_args=None): if not default_args: default_args = Args() KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'} parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=""" multiple-scattering calculations and optimization you must call pmsco.py from a project file which defines the calculation project. the project file must be a regular Python module and define: 1) a project class derived from pmsco.project.Project. the class implements/overrides all necessary methods of the calculation project, in particular create_model_space, create_cluster, and create_params. 2) a global function named create_project. the function accepts a namespace object from the argument parser. it may evaluate extra, project-specific arguments. it does not need to evaluate the common parameters described below. the function must return an instance of the project class described above. 3) main code that parses the command line and calls pmsco.pmsco.main_pmsco(). (see the projects folder for examples). """) # the required argument list may depend on the calculation mode. # for simplicity, the parser does not check these requirements. # all parameters are optional and accepted regardless of mode. # errors may occur if implicit requirements are not met. parser.add_argument('project_module', help="path to custom module that defines the calculation project") parser.add_argument('-m', '--mode', default=default_args.mode, choices=['single', 'grid', 'swarm', 'genetic', 'table'], help='calculation mode') parser.add_argument('--pop-size', type=int, default=default_args.pop_size, help='population size (number of particles) in swarm or genetic optimization mode. ' + 'default is the greater of 4 or the number of calculation processes.') parser.add_argument('--seed-file', help='path and name of population seed file. ' + 'population data of previous optimizations can be used to seed a new optimization. ' + 'the file must have the same structure as the .pop or .dat files.') parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit, help='maximum number of models to use from the seed file. ' + 'the models with the best R-factors are selected.') parser.add_argument('-d', '--data-dir', default=default_args.data_dir, help='directory path for experimental data files (if required by project). ' + 'default: working directory') parser.add_argument('-o', '--output-file', default=default_args.output_file, help='base path for intermediate and output files.') parser.add_argument('-b', '--db-file', default=default_args.db_file, help='name of an sqlite3 database file where the results should be stored.') parser.add_argument('--table-file', help='path and name of population table file for table optimization mode. ' + 'the file must have the same structure as the .pop or .dat files.') parser.add_argument('-k', '--keep-files', nargs='*', default=default_args.keep_files, choices=KEEP_FILES_CHOICES, help='output file categories to keep after the calculation. ' 'by default, cluster and model (simulated data) ' 'of a limited number of best models are kept.') parser.add_argument('--keep-best', type=int, default=default_args.keep_best, help='number of best models for which to keep result files ' '(at each node from root down to keep-levels).') parser.add_argument('--keep-levels', type=int, choices=range(5), default=default_args.keep_levels, help='task level down to which result files of best models are kept. ' '0 = model, 1 = scan, 2 = domain, 3 = emitter, 4 = region.') parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit, help='wall time limit in hours. the optimizers try to finish before the limit.') parser.add_argument('--log-file', default=default_args.log_file, help='name of the main log file. ' + 'under MPI, the rank of the process is inserted before the extension.') parser.add_argument('--log-level', default=default_args.log_level, help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.') feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true", help="enable logging. by default, logging is on.") feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false', help="disable logging. by default, logging is on.") parser.set_defaults(log_enable=default_args.log_enable) return parser def parse_cli(): """ parse the command line interface @return: Namespace object created by the argument parser. """ default_args = Args() parser = get_cli_parser(default_args) args, unknown_args = parser.parse_known_args() return args, unknown_args def import_project_module(path): """ import the custom project module. imports the project module given its file path. the path is expanded to its absolute form and appended to the python path. @param path: path and name of the module to be loaded. path is optional and defaults to the python path. if the name includes an extension, it is stripped off. @return: the loaded module as a python object """ path, name = os.path.split(path) name, __ = os.path.splitext(name) path = os.path.abspath(path) sys.path.append(path) project_module = importlib.import_module(name) return project_module def main(): args, unknown_args = parse_cli() if args: module = import_project_module(args.project_module) try: project_args = module.parse_project_args(unknown_args) except NameError: project_args = None project = module.create_project() set_common_args(project, args) try: module.set_project_args(project, project_args) except NameError: pass run_project(project) if __name__ == '__main__': main() sys.exit(0)