482 lines
17 KiB
Python
482 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
|
|
"""
|
|
@package pmsco.pmsco
|
|
PSI Multiple-Scattering Calculation and Structural Optimization
|
|
|
|
This is the top-level interface of the PMSCO package.
|
|
All calculations (any mode, any project) start by calling the run_project function of this module.
|
|
The module also provides a command line, a run-file, and a run-dict interface.
|
|
They all, in one way or another, set up an instance of a Project class and call the run_project function.
|
|
|
|
For parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
|
Note that in parallel mode, one process takes the role of the coordinator (master).
|
|
The master does not run calculations and is idle most of the time.
|
|
To benefit from parallel execution on a work station, NN should be the number of processors.
|
|
On a cluster, the number of processes should be chosen according to the available resources.
|
|
|
|
All calculations can also be run in a single process.
|
|
PMSCO serializes the calculations automatically.
|
|
|
|
The code of the main module is independent of a particular calculation project.
|
|
All project-specific code must be in a separate python module.
|
|
The project module must implement a class derived from pmsco.project.Project.
|
|
The project module and class must be referenced in the run-file, or passed to the suitable run-function.
|
|
|
|
While they are not strictly necessary, run-files help to separate code and data.
|
|
Code is usually version-controlled, run-files contain metadata of calculations and should be kept with the results.
|
|
A git hash can be used to refer to the code used to execute the calculation.
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2015-23 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import argparse
|
|
from collections.abc import Mapping
|
|
import importlib
|
|
import importlib.util
|
|
import json
|
|
import jsonschema
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
import sys
|
|
import typing
|
|
|
|
try:
|
|
from mpi4py import MPI
|
|
mpi_comm = MPI.COMM_WORLD
|
|
mpi_size = mpi_comm.Get_size()
|
|
mpi_rank = mpi_comm.Get_rank()
|
|
except ImportError:
|
|
MPI = None
|
|
mpi_comm = None
|
|
mpi_size = 1
|
|
mpi_rank = 0
|
|
|
|
pmsco_root = Path(__file__).resolve().parent.parent
|
|
if str(pmsco_root) not in sys.path:
|
|
sys.path.insert(0, str(pmsco_root))
|
|
|
|
from pmsco.database.git import get_git_hash
|
|
import pmsco.dispatch as dispatch
|
|
from pmsco.project import Project
|
|
|
|
# the module-level logger
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
|
|
"""
|
|
configure the root logger. direct the logs either to a file or the null handler.
|
|
|
|
this function must be called before the first logging command
|
|
whether a log output is requested or not.
|
|
to disable logging, call this function with enable=False (default).
|
|
|
|
modules should create their own loggers, by calling
|
|
@code logger = logging.getLogger(__name__) @endcode
|
|
at the top of the module code.
|
|
that logger is then used by calls like
|
|
@code logger.debug(message) @endcode.
|
|
|
|
@param enable: (bool) True=enable logging to the specified file,
|
|
False=do not generate a log (null handler).
|
|
@param filename: (Path-like) path and name of the log file.
|
|
if this process is part of an MPI communicator,
|
|
the function inserts a dot and the MPI rank of this process before the extension.
|
|
if the filename is empty, logging is disabled.
|
|
@param level: (string) name of the log level.
|
|
must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
|
|
if empty, logging is disabled.
|
|
if not a valid level, defaults to "WARNING".
|
|
@return None
|
|
"""
|
|
enable = enable and str(filename) and level
|
|
numeric_level = getattr(logging, level.upper(), logging.WARNING)
|
|
root_logger = logging.getLogger()
|
|
root_logger.setLevel(numeric_level)
|
|
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|
|
|
if enable:
|
|
if mpi_size > 1:
|
|
p = Path(filename)
|
|
filename = p.with_suffix(f".{mpi_rank}" + p.suffix)
|
|
|
|
log_format = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
|
formatter = logging.Formatter(log_format)
|
|
|
|
handler = logging.FileHandler(filename, mode="w", delay=True)
|
|
handler.setLevel(numeric_level)
|
|
handler.setFormatter(formatter)
|
|
else:
|
|
handler = logging.NullHandler()
|
|
|
|
root_logger.addHandler(handler)
|
|
|
|
|
|
def run_project(project):
|
|
"""
|
|
run a calculation project.
|
|
|
|
the function sets up logging, validates the project, chooses the handler classes,
|
|
and passes control to the pmsco.dispatch module to run the calculations.
|
|
|
|
@param project: fully initialized project object.
|
|
the validate method is called as part of this function after setting up the logger.
|
|
@return: None
|
|
"""
|
|
|
|
log_file = Path(project.log_file)
|
|
if not log_file.name:
|
|
log_file = Path(project.job_name).with_suffix(".log")
|
|
if log_file.name:
|
|
log_file.parent.mkdir(exist_ok=True)
|
|
log_level = project.log_level
|
|
else:
|
|
log_level = ""
|
|
setup_logging(enable=bool(log_level), filename=log_file, level=log_level)
|
|
if mpi_rank == 0:
|
|
project.log_project_args()
|
|
|
|
if not project.git_hash:
|
|
project.git_hash = get_git_hash()
|
|
|
|
project.validate()
|
|
|
|
if project:
|
|
logger.info("starting calculations")
|
|
try:
|
|
dispatch.run_calculations(project)
|
|
except (SystemExit, KeyboardInterrupt):
|
|
raise
|
|
except Exception:
|
|
logger.exception("unhandled exception during calculations.")
|
|
raise
|
|
else:
|
|
logger.info("calculations complete")
|
|
else:
|
|
logger.error("undefined project, optimizer, or calculator.")
|
|
|
|
|
|
def schedule_project(project, run_dict):
|
|
"""
|
|
schedule a calculation project.
|
|
|
|
the function validates the project and submits a job to the scheduler.
|
|
|
|
placeholders in run-file's directories dict are resolved.
|
|
|
|
@param project: fully initialized project object.
|
|
the validate method is called as part of this function.
|
|
|
|
@param run_dict: dictionary holding the contents of the run file.
|
|
|
|
@return: None
|
|
"""
|
|
assert mpi_rank == 0
|
|
setup_logging(enable=False)
|
|
|
|
project.validate()
|
|
try:
|
|
dirs = run_dict['project']['directories']
|
|
for k in dirs:
|
|
dirs[k] = str(project.directories[k])
|
|
except KeyError:
|
|
pass
|
|
if project.git_hash:
|
|
run_dict['project']['git_hash'] = project.git_hash
|
|
elif hsh := get_git_hash():
|
|
run_dict['project']['git_hash'] = hsh
|
|
if project.db_file:
|
|
run_dict['project']['db_file'] = str(project.db_file)
|
|
if sf := project.optimizer_params['seed_file']:
|
|
run_dict['project']['optimizer_params']['seed_file'] = str(sf)
|
|
|
|
schedule_dict = run_dict['schedule']
|
|
module = _load_module(schedule_dict['__module__'])
|
|
schedule_class = getattr(module, schedule_dict['__class__'])
|
|
schedule = schedule_class(project)
|
|
schedule.set_properties(vars(module), schedule_dict, project)
|
|
schedule.run_dict = run_dict
|
|
schedule.validate()
|
|
schedule.submit()
|
|
|
|
|
|
def _load_runfile(runfile: typing.Union[typing.Dict, str, bytes, os.PathLike, typing.TextIO]) -> typing.Mapping:
|
|
"""
|
|
Load a runfile
|
|
|
|
The function loads a runfile from a dictionary, an open json file object, or a json file specified by a file path.
|
|
If the source is a file, the directory is added to the project directories under the `run` key.
|
|
|
|
@param runfile: Dictionary with contents of a runfile, an open file object, or a path-like.
|
|
@return: Dictionary with the contents of the runfile.
|
|
"""
|
|
|
|
def set_run_dir(fileobj):
|
|
try:
|
|
p = Path(fileobj.name).parent.resolve(True)
|
|
rf['project']['directories']['run'] = p
|
|
except (AttributeError, FileNotFoundError):
|
|
pass
|
|
|
|
if isinstance(runfile, Mapping):
|
|
rf = runfile
|
|
elif hasattr(runfile, 'read'):
|
|
rf = json.load(runfile)
|
|
set_run_dir(runfile)
|
|
else:
|
|
with open(runfile, 'r') as f:
|
|
rf = json.load(f)
|
|
set_run_dir(f)
|
|
|
|
schema_dir = Path(__file__).parent / "schema"
|
|
schema_file = schema_dir / "runfile.schema.json"
|
|
schema_url = f"file://{schema_dir}/"
|
|
with open(schema_file) as f:
|
|
schema = json.load(f)
|
|
|
|
resolver = jsonschema.RefResolver(schema_url, None)
|
|
jsonschema.validate(rf, schema, resolver=resolver)
|
|
|
|
return rf
|
|
|
|
|
|
def _load_module(name_or_path: typing.Union[str, bytes, os.PathLike]):
|
|
"""
|
|
Load a Python module
|
|
|
|
@param name_or_path: Module name or file path of the module.
|
|
If a module name is given, the module must be in the Python module search path.
|
|
@return: module
|
|
@raise ValueError if the module is not found
|
|
"""
|
|
|
|
try:
|
|
return importlib.import_module(name_or_path)
|
|
except ImportError:
|
|
p = Path(name_or_path)
|
|
module_name = p.stem
|
|
spec = importlib.util.spec_from_file_location(module_name, name_or_path)
|
|
try:
|
|
module = importlib.util.module_from_spec(spec)
|
|
except AttributeError:
|
|
msg = f"Can't find module {name_or_path}"
|
|
print(msg, sys.stderr)
|
|
print("sys.path:", sys.path, sys.stderr)
|
|
raise ValueError(msg)
|
|
|
|
sys.modules[module_name] = module
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
def main_project(symbols: typing.Optional[typing.Dict[str, typing.Any]] = None,
|
|
project: typing.Optional[Project] = None,
|
|
project_module: typing.Optional[typing.Union[str, os.PathLike]] = None,
|
|
project_class: typing.Optional[typing.Union[str, typing.Type[Project]]] = None,
|
|
runfile: typing.Union[typing.Dict, str, bytes, os.PathLike, typing.TextIO] = None):
|
|
|
|
"""
|
|
Main function with optional arguments.
|
|
|
|
This function starts the whole process based on function arguments.
|
|
The arguments can be a an existing project instance, a project class, and/or a runfile.
|
|
|
|
The function carries out the following steps:
|
|
|
|
1. Load a runfile - if specified.
|
|
2. Create a project object.
|
|
3. Apply the runfile to the project.
|
|
4. Run or schedule the project.
|
|
|
|
The project instance is produced from the first match of the following conditions:
|
|
|
|
1. `project` argument is a Project instance.
|
|
2. `project_class` is a Project class.
|
|
3. `__class__` entry from runfile.
|
|
The class must be listed in symbols,
|
|
or the runfile must also contain a `__module__` entry
|
|
with the name or file path of the project module that declares the class.
|
|
|
|
The project is scheduled rather than executed if the corresponding section in the runfile is present.
|
|
|
|
@param symbols: Namespace of the project module, which contains project, cluster and calculator classes.
|
|
This is the basis for class resolution from runfiles.
|
|
If called by the project module, it should pass vars().
|
|
@param project: project instance.
|
|
@param project_class: project class or name of a project class defined in `symbols`.
|
|
@param project_module: name or file path of the project module.
|
|
This is required if symobls is not defined
|
|
and the project class is given as a string (project_class argument or runfile value).
|
|
@param runfile: A file-like, path-like or dict with runfile contents.
|
|
Runfiles must be in json-format.
|
|
@return: None
|
|
"""
|
|
|
|
if runfile is not None:
|
|
rf = _load_runfile(runfile)
|
|
rfp = rf['project']
|
|
else:
|
|
rf = None
|
|
rfp = None
|
|
|
|
if project is None:
|
|
if project_class is None or not issubclass(project_class, Project):
|
|
project_classname = project_class
|
|
if not project_classname:
|
|
project_classname = rfp['__class__']
|
|
|
|
if not symbols:
|
|
if project_module:
|
|
module = _load_module(project_module)
|
|
symbols = vars(module)
|
|
else:
|
|
module = _load_module(rfp['__module__'])
|
|
symbols = vars(module)
|
|
|
|
project_class = symbols[project_classname]
|
|
|
|
project = project_class()
|
|
|
|
project.directories['pmsco'] = Path(__file__).parent
|
|
try:
|
|
project.directories['project'] = Path(module.__file__).parent
|
|
except AttributeError:
|
|
pass
|
|
|
|
if rfp:
|
|
project.set_properties(symbols, rfp, project)
|
|
|
|
try:
|
|
schedule_enabled = rf['schedule']['enabled']
|
|
except KeyError:
|
|
schedule_enabled = False
|
|
if schedule_enabled:
|
|
schedule_project(project, rf)
|
|
else:
|
|
run_project(project)
|
|
|
|
|
|
def get_cli_parser():
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
description="""
|
|
PSI multiple-scattering calculations and optimization (PMSCO)
|
|
|
|
This is the main command line entry point for PMSCO calculation jobs.
|
|
Alternative entry points can be provided by project modules.
|
|
The command line requires at least a run-file to define the project parameters.
|
|
|
|
The command can run a calculation job directly or submit it to a job queue
|
|
via the `schedule` section in the run-file.
|
|
The program detects whether it runs in a single-process or OpenMPI multi-process environment
|
|
and coordinates parallel processes automatically.
|
|
|
|
All arguments should preferably be declared in the run-file.
|
|
A small number of options can be passed on the command line
|
|
to override the corresponding parameter of the run-file.
|
|
|
|
Please see the documentation that is compiled in docs/html/index.html
|
|
for instructions how to set up a project module and run-files.
|
|
See also the projects folder for examples.
|
|
""")
|
|
|
|
parser.add_argument('-r', '--run-file',
|
|
help="Path to a run-file in JSON format which contains all calculation parameters. "
|
|
"This argument is mandatory. "
|
|
)
|
|
parser.add_argument('-m', '--module',
|
|
help="File name of the custom project module. "
|
|
"The module must declare the project class and other project-specific classes. "
|
|
"This optional argument overrides the __module__ entry of the run-file. "
|
|
)
|
|
parser.add_argument('-c', '--project-class',
|
|
help="Project class. Requires --module to be specified. "
|
|
"The project class is resolved in the namespace of the module. "
|
|
"This optional argument corresponds to the __class__ entry of the run-file. "
|
|
)
|
|
parser.add_argument('-o', '--output-dir',
|
|
help="Output directory. "
|
|
"This optional argument overrides the directories['output'] entry of the run-file."
|
|
)
|
|
parser.add_argument('-j', '--job-name',
|
|
help="Job name. Should be short and valid as a part of directory and file names. "
|
|
"If a persistent database is used, it must not exist in the database yet. "
|
|
"This optional argument overrides the job_name of the run-file."
|
|
)
|
|
|
|
return parser
|
|
|
|
|
|
def parse_cli():
|
|
"""
|
|
parse the command line interface
|
|
|
|
@return: Namespace object created by the argument parser.
|
|
"""
|
|
parser = get_cli_parser()
|
|
|
|
args, unknown_args = parser.parse_known_args()
|
|
|
|
return args, unknown_args
|
|
|
|
|
|
def main(symbols: typing.Optional[typing.Dict[str, typing.Any]] = None):
|
|
"""
|
|
Main function with command line parsing
|
|
|
|
This function starts the whole process with parameters from the command line.
|
|
|
|
If the command line contains a run-file parameter, it determines the project class and the project parameters.
|
|
|
|
The project class can be specified either in the run-file, on the command line or the function arguments.
|
|
If the run-file specifies a class name, that class is instantiated.
|
|
|
|
@return: None
|
|
"""
|
|
|
|
args, unknown_args = parse_cli()
|
|
|
|
try:
|
|
rf = _load_runfile(args.run_file)
|
|
except AttributeError:
|
|
rf = {'project': {}}
|
|
|
|
try:
|
|
if args.module:
|
|
rf['project']['__module__'] = args.module
|
|
except AttributeError:
|
|
pass
|
|
|
|
try:
|
|
if args.project_class:
|
|
rf['project']['__class__'] = args.project_class
|
|
except AttributeError:
|
|
pass
|
|
|
|
try:
|
|
if args.output_dir:
|
|
rf['project']['directories']['output'] = args.output_dir
|
|
except (AttributeError, KeyError):
|
|
pass
|
|
|
|
try:
|
|
if args.job_name:
|
|
rf['project']['job_name'] = args.job_name
|
|
except (AttributeError, KeyError):
|
|
pass
|
|
|
|
main_project(symbols=symbols, runfile=rf)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
sys.exit(0)
|