public release 3.0.0 - see README and CHANGES for details
This commit is contained in:
@ -8,16 +8,13 @@ python pmsco [pmsco-arguments]
|
||||
@endverbatim
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os.path
|
||||
|
||||
file_dir = os.path.dirname(__file__) or '.'
|
||||
root_dir = os.path.join(file_dir, '..')
|
||||
root_dir = os.path.abspath(root_dir)
|
||||
sys.path[0] = root_dir
|
||||
pmsco_root = Path(__file__).resolve().parent.parent
|
||||
if str(pmsco_root) not in sys.path:
|
||||
sys.path.insert(0, str(pmsco_root))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import pmsco.pmsco
|
||||
|
@ -13,8 +13,9 @@ SHELL=/bin/sh
|
||||
.PHONY: all clean phagen
|
||||
|
||||
FC?=gfortran
|
||||
FCOPTS?=-std=legacy
|
||||
F2PY?=f2py
|
||||
F2PYOPTS?=
|
||||
F2PYOPTS?=--f77flags=-std=legacy --f90flags=-std=legacy
|
||||
CC?=gcc
|
||||
CCOPTS?=
|
||||
SWIG?=swig
|
||||
|
@ -17,22 +17,20 @@ pip install --user periodictable
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015-20 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
import periodictable as pt
|
||||
import sys
|
||||
|
||||
import pmsco.config as config
|
||||
|
||||
## default file format identifier
|
||||
FMT_DEFAULT = 0
|
||||
## MSC file format identifier
|
||||
@ -227,13 +225,13 @@ class Cluster(object):
|
||||
"""
|
||||
self.rmax = r
|
||||
|
||||
def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer_class=0):
|
||||
def build_element(self, index, element, x, y, z, emitter, charge=0., scatterer_class=0):
|
||||
"""
|
||||
build a tuple in the format of the internal data array.
|
||||
|
||||
@param index: (int) index
|
||||
|
||||
@param element_number: (int) chemical element number
|
||||
@param element: chemical element number (int) or symbol (str)
|
||||
|
||||
@param x, y, z: (float) atom coordinates in the cluster
|
||||
|
||||
@ -243,7 +241,13 @@ class Cluster(object):
|
||||
|
||||
@param scatterer_class: (int) scatterer class. default = 0.
|
||||
"""
|
||||
symbol = pt.elements[element_number].symbol
|
||||
try:
|
||||
element_number = int(element)
|
||||
symbol = pt.elements[element_number].symbol
|
||||
except ValueError:
|
||||
symbol = element
|
||||
element_number = pt.elements.symbol(symbol.strip()).number
|
||||
|
||||
element = (index, element_number, symbol, scatterer_class, x, y, z, int(emitter), charge)
|
||||
return element
|
||||
|
||||
@ -251,7 +255,7 @@ class Cluster(object):
|
||||
"""
|
||||
add a single atom to the cluster.
|
||||
|
||||
@param atomtype: (int) chemical element number
|
||||
@param atomtype: chemical element number (int) or symbol (str)
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3)) position vector
|
||||
|
||||
@ -274,7 +278,7 @@ class Cluster(object):
|
||||
self.rmax (maximum distance from the origin).
|
||||
all atoms are non-emitters.
|
||||
|
||||
@param atomtype: (int) chemical element number
|
||||
@param atomtype: chemical element number (int) or symbol (str)
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3))
|
||||
position vector of the first atom (basis vector)
|
||||
@ -307,7 +311,7 @@ class Cluster(object):
|
||||
and z_surf (position of the surface).
|
||||
all atoms are non-emitters.
|
||||
|
||||
@param atomtype: (int) chemical element number
|
||||
@param atomtype: chemical element number (int) or symbol (str)
|
||||
|
||||
@param v_pos: (numpy.ndarray, shape = (3))
|
||||
position vector of the first atom (basis vector)
|
||||
@ -1133,7 +1137,7 @@ class Cluster(object):
|
||||
np.savetxt(f, data, fmt=file_format, header=header, comments="")
|
||||
|
||||
|
||||
class ClusterGenerator(object):
|
||||
class ClusterGenerator(config.ConfigurableObject):
|
||||
"""
|
||||
cluster generator class.
|
||||
|
||||
@ -1151,6 +1155,7 @@ class ClusterGenerator(object):
|
||||
@param project: reference to the project object.
|
||||
cluster generators may need to look up project parameters.
|
||||
"""
|
||||
super().__init__()
|
||||
self.project = project
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
@ -1258,7 +1263,7 @@ class LegacyClusterGenerator(ClusterGenerator):
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
super(LegacyClusterGenerator, self).__init__(project)
|
||||
super().__init__(project)
|
||||
|
||||
def count_emitters(self, model, index):
|
||||
"""
|
||||
|
120
pmsco/config.py
Normal file
120
pmsco/config.py
Normal file
@ -0,0 +1,120 @@
|
||||
"""
|
||||
@package pmsco.config
|
||||
infrastructure for configurable objects
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2021 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import collections.abc
|
||||
import functools
|
||||
import inspect
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def resolve_path(path, dirs):
|
||||
"""
|
||||
resolve a file path by replacing placeholders
|
||||
|
||||
placeholders are enclosed in curly braces.
|
||||
values for all possible placeholders are provided in a dictionary.
|
||||
|
||||
@param path: str, Path or other path-like.
|
||||
example: '{work}/test/testfile.dat'.
|
||||
@param dirs: dictionary mapping placeholders to project paths.
|
||||
the paths can be str, Path or other path-like
|
||||
example: {'work': '/home/user/work'}
|
||||
@return: pathlib.Path object
|
||||
"""
|
||||
return Path(*(p.format(**dirs) for p in Path(path).parts))
|
||||
|
||||
|
||||
class ConfigurableObject(object):
|
||||
"""
|
||||
Parent class for objects that can be configured by a run file
|
||||
|
||||
the run file is a JSON file that contains object data in a nested dictionary structure.
|
||||
|
||||
in the dictionary structure the keys are property or attribute names of the object to be initialized.
|
||||
keys starting with a non-alphabetic character (except for some special keys like __class__) are ignored.
|
||||
these can be used as comments, or they protect private attributes.
|
||||
|
||||
the values can be numeric values, strings, lists or dictionaries.
|
||||
|
||||
simple values are simply assigned using setattr.
|
||||
this may call a property setter if defined.
|
||||
|
||||
lists are iterated. each item is appended to the attribute.
|
||||
the attribute must implement an append method in this case.
|
||||
|
||||
if an item is a dictionary and contains the special key '__class__',
|
||||
an object of that class is instantiated and recursively initialized with the dictionary elements.
|
||||
this requires that the class can be found in the module scope passed to the parser methods,
|
||||
and that the class inherits from this class.
|
||||
|
||||
cases that can't be covered easily using this mechanism
|
||||
should be implemented in a property setter.
|
||||
value-checking should also be done in a property setter (or the append method in sequence-like objects).
|
||||
"""
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def set_properties(self, module, data_dict, project):
|
||||
"""
|
||||
set properties of this class.
|
||||
|
||||
@param module: module reference that should be used to resolve class names.
|
||||
this is usually the project module.
|
||||
@param data_dict: dictionary of properties to set.
|
||||
see the class description for details.
|
||||
@param project: reference to the project object.
|
||||
@return: None
|
||||
"""
|
||||
for key in data_dict:
|
||||
if key[0].isalpha():
|
||||
self.set_property(module, key, data_dict[key], project)
|
||||
|
||||
def set_property(self, module, key, value, project):
|
||||
obj = self.parse_object(module, value, project)
|
||||
if hasattr(self, key):
|
||||
if obj is not None:
|
||||
if isinstance(obj, collections.abc.MutableSequence):
|
||||
attr = getattr(self, key)
|
||||
for item in obj:
|
||||
attr.append(item)
|
||||
elif isinstance(obj, collections.abc.Mapping):
|
||||
d = getattr(self, key)
|
||||
if d is not None and isinstance(d, collections.abc.MutableMapping):
|
||||
d.update(obj)
|
||||
else:
|
||||
setattr(self, key, obj)
|
||||
else:
|
||||
setattr(self, key, obj)
|
||||
else:
|
||||
setattr(self, key, obj)
|
||||
else:
|
||||
logger.warning(f"class {self.__class__.__name__} does not have attribute {key}.")
|
||||
|
||||
def parse_object(self, module, value, project):
|
||||
if isinstance(value, collections.abc.MutableMapping) and "__class__" in value:
|
||||
cn = value["__class__"].split('.')
|
||||
c = functools.reduce(getattr, cn, module)
|
||||
s = inspect.signature(c)
|
||||
if 'project' in s.parameters:
|
||||
o = c(project=project)
|
||||
else:
|
||||
o = c()
|
||||
o.set_properties(module, value, project)
|
||||
elif isinstance(value, collections.abc.MutableSequence):
|
||||
o = [self.parse_object(module, i, project) for i in value]
|
||||
else:
|
||||
o = value
|
||||
return o
|
@ -4,16 +4,13 @@ calculation dispatcher.
|
||||
|
||||
@author Matthias Muntwiler
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import os.path
|
||||
import datetime
|
||||
@ -21,10 +18,20 @@ import signal
|
||||
import collections
|
||||
import copy
|
||||
import logging
|
||||
import math
|
||||
|
||||
from attrdict import AttrDict
|
||||
from mpi4py import MPI
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_size = mpi_comm.Get_size()
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
except ImportError:
|
||||
MPI = None
|
||||
mpi_comm = None
|
||||
mpi_size = 1
|
||||
mpi_rank = 0
|
||||
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -521,8 +528,7 @@ class MscoProcess(object):
|
||||
#
|
||||
# the default is 2 days after start.
|
||||
|
||||
def __init__(self, comm):
|
||||
self._comm = comm
|
||||
def __init__(self):
|
||||
self._project = None
|
||||
self._atomic_scattering = None
|
||||
self._multiple_scattering = None
|
||||
@ -829,12 +835,12 @@ class MscoMaster(MscoProcess):
|
||||
# the values are handlers.TaskHandler objects.
|
||||
# the objects can be accessed in attribute or dictionary notation.
|
||||
|
||||
def __init__(self, comm):
|
||||
super(MscoMaster, self).__init__(comm)
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._pending_tasks = collections.OrderedDict()
|
||||
self._running_tasks = collections.OrderedDict()
|
||||
self._complete_tasks = collections.OrderedDict()
|
||||
self._slaves = self._comm.Get_size() - 1
|
||||
self._slaves = mpi_size - 1
|
||||
self._idle_ranks = []
|
||||
self.max_calculations = 1000000
|
||||
self._calculations = 0
|
||||
@ -879,8 +885,8 @@ class MscoMaster(MscoProcess):
|
||||
self._idle_ranks = list(range(1, self._running_slaves + 1))
|
||||
|
||||
self._root_task = CalculationTask()
|
||||
self._root_task.file_root = project.output_file
|
||||
self._root_task.model = project.create_model_space().start
|
||||
self._root_task.file_root = str(project.output_file)
|
||||
self._root_task.model = project.model_space.start
|
||||
|
||||
for level in self.task_levels:
|
||||
self.task_handlers[level] = project.handler_classes[level]()
|
||||
@ -1033,7 +1039,7 @@ class MscoMaster(MscoProcess):
|
||||
else:
|
||||
logger.debug("assigning task %s to rank %u", str(task.id), rank)
|
||||
self._running_tasks[task.id] = task
|
||||
self._comm.send(task.get_mpi_message(), dest=rank, tag=TAG_NEW_TASK)
|
||||
mpi_comm.send(task.get_mpi_message(), dest=rank, tag=TAG_NEW_TASK)
|
||||
self._calculations += 1
|
||||
else:
|
||||
if not self._finishing:
|
||||
@ -1055,7 +1061,7 @@ class MscoMaster(MscoProcess):
|
||||
while self._idle_ranks:
|
||||
rank = self._idle_ranks.pop()
|
||||
logger.debug("send finish tag to rank %u", rank)
|
||||
self._comm.send(None, dest=rank, tag=TAG_FINISH)
|
||||
mpi_comm.send(None, dest=rank, tag=TAG_FINISH)
|
||||
self._running_slaves -= 1
|
||||
|
||||
def _receive_result(self):
|
||||
@ -1065,7 +1071,7 @@ class MscoMaster(MscoProcess):
|
||||
if self._running_slaves > 0:
|
||||
logger.debug("waiting for calculation result")
|
||||
s = MPI.Status()
|
||||
data = self._comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
|
||||
data = mpi_comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
|
||||
|
||||
if s.tag == TAG_NEW_RESULT:
|
||||
task_id = self._accept_task_done(data)
|
||||
@ -1185,8 +1191,8 @@ class MscoSlave(MscoProcess):
|
||||
#
|
||||
# typically, a task is aborted when an exception is encountered.
|
||||
|
||||
def __init__(self, comm):
|
||||
super(MscoSlave, self).__init__(comm)
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._errors = 0
|
||||
self._max_errors = 5
|
||||
|
||||
@ -1199,7 +1205,7 @@ class MscoSlave(MscoProcess):
|
||||
self._running = True
|
||||
while self._running:
|
||||
logger.debug("waiting for message")
|
||||
data = self._comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
|
||||
data = mpi_comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
|
||||
if s.tag == TAG_NEW_TASK:
|
||||
logger.debug("received new task")
|
||||
self.accept_task(data)
|
||||
@ -1229,17 +1235,17 @@ class MscoSlave(MscoProcess):
|
||||
logger.exception(BMsg("unhandled exception in calculation task {0}", task.id))
|
||||
self._errors += 1
|
||||
if self._errors <= self._max_errors:
|
||||
self._comm.send(data, dest=0, tag=TAG_INVALID_RESULT)
|
||||
mpi_comm.send(data, dest=0, tag=TAG_INVALID_RESULT)
|
||||
else:
|
||||
logger.error("too many exceptions, aborting")
|
||||
self._running = False
|
||||
self._comm.send(data, dest=0, tag=TAG_ERROR_ABORTING)
|
||||
mpi_comm.send(data, dest=0, tag=TAG_ERROR_ABORTING)
|
||||
else:
|
||||
logger.debug(BMsg("sending result of task {0} to master", result.id))
|
||||
self._comm.send(result.get_mpi_message(), dest=0, tag=TAG_NEW_RESULT)
|
||||
mpi_comm.send(result.get_mpi_message(), dest=0, tag=TAG_NEW_RESULT)
|
||||
|
||||
|
||||
def run_master(mpi_comm, project):
|
||||
def run_master(project):
|
||||
"""
|
||||
initialize and run the master calculation loop.
|
||||
|
||||
@ -1251,25 +1257,25 @@ def run_master(mpi_comm, project):
|
||||
if an unhandled exception occurs, this function aborts the MPI communicator, killing all MPI processes.
|
||||
the caller will not have a chance to handle the exception.
|
||||
|
||||
@param mpi_comm: MPI communicator (mpi4py.MPI.COMM_WORLD).
|
||||
|
||||
@param project: project instance (sub-class of project.Project).
|
||||
"""
|
||||
try:
|
||||
master = MscoMaster(mpi_comm)
|
||||
master = MscoMaster()
|
||||
master.setup(project)
|
||||
master.run()
|
||||
master.cleanup()
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
mpi_comm.Abort()
|
||||
if mpi_comm:
|
||||
mpi_comm.Abort()
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("unhandled exception in master calculation loop.")
|
||||
mpi_comm.Abort()
|
||||
if mpi_comm:
|
||||
mpi_comm.Abort()
|
||||
raise
|
||||
|
||||
|
||||
def run_slave(mpi_comm, project):
|
||||
def run_slave(project):
|
||||
"""
|
||||
initialize and run the slave calculation loop.
|
||||
|
||||
@ -1282,12 +1288,10 @@ def run_slave(mpi_comm, project):
|
||||
unless it is a SystemExit or KeyboardInterrupt (where we expect that the master also receives the signal),
|
||||
the MPI communicator is aborted, killing all MPI processes.
|
||||
|
||||
@param mpi_comm: MPI communicator (mpi4py.MPI.COMM_WORLD).
|
||||
|
||||
@param project: project instance (sub-class of project.Project).
|
||||
"""
|
||||
try:
|
||||
slave = MscoSlave(mpi_comm)
|
||||
slave = MscoSlave()
|
||||
slave.setup(project)
|
||||
slave.run()
|
||||
slave.cleanup()
|
||||
@ -1295,7 +1299,8 @@ def run_slave(mpi_comm, project):
|
||||
raise
|
||||
except Exception:
|
||||
logger.exception("unhandled exception in slave calculation loop.")
|
||||
mpi_comm.Abort()
|
||||
if mpi_comm:
|
||||
mpi_comm.Abort()
|
||||
raise
|
||||
|
||||
|
||||
@ -1307,12 +1312,9 @@ def run_calculations(project):
|
||||
|
||||
@param project: project instance (sub-class of project.Project).
|
||||
"""
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
|
||||
if mpi_rank == 0:
|
||||
logger.debug("MPI rank %u setting up master loop", mpi_rank)
|
||||
run_master(mpi_comm, project)
|
||||
run_master(project)
|
||||
else:
|
||||
logger.debug("MPI rank %u setting up slave loop", mpi_rank)
|
||||
run_slave(mpi_comm, project)
|
||||
run_slave(project)
|
||||
|
@ -1,7 +0,0 @@
|
||||
/* EDAC interface for other programs */
|
||||
%module edac
|
||||
%{
|
||||
extern int run_script(char *scriptfile);
|
||||
%}
|
||||
|
||||
extern int run_script(char *scriptfile);
|
File diff suppressed because it is too large
Load Diff
@ -10,6 +10,8 @@ the binding energies are compiled from Gwyn Williams' web page
|
||||
(https://userweb.jlab.org/~gwyn/ebindene.html).
|
||||
please refer to the original web page or the x-ray data booklet
|
||||
for original sources, definitions and remarks.
|
||||
binding energies of gases are replaced by respective values of a common compound
|
||||
from the 'handbook of x-ray photoelectron spectroscopy' (physical electronics, inc., 1995).
|
||||
|
||||
usage
|
||||
-----
|
||||
@ -52,15 +54,47 @@ from pmsco.compat import open
|
||||
index_energy = np.zeros(0)
|
||||
index_number = np.zeros(0)
|
||||
index_term = []
|
||||
default_data_path = os.path.join(os.path.dirname(__file__), "bindingenergy.json")
|
||||
|
||||
|
||||
def load_data():
|
||||
data_path = os.path.join(os.path.dirname(__file__), "bindingenergy.json")
|
||||
def load_data(data_path=None):
|
||||
"""
|
||||
load binding energy data from json file
|
||||
|
||||
the data file must be in the same format as generated by save_data.
|
||||
|
||||
@param file path of the data file. default: "bindingenergy.json" next to this module file
|
||||
|
||||
@return dictionary
|
||||
"""
|
||||
if data_path is None:
|
||||
data_path = default_data_path
|
||||
with open(data_path) as fp:
|
||||
data = json.load(fp)
|
||||
return data
|
||||
|
||||
|
||||
def save_data(data_path=None):
|
||||
"""
|
||||
save binding energy data to json file
|
||||
|
||||
@param file path of the data file. default: "bindingenergy.json" next to this module file
|
||||
|
||||
@return None
|
||||
"""
|
||||
if data_path is None:
|
||||
data_path = default_data_path
|
||||
data = {}
|
||||
for element in pt.elements:
|
||||
element_data = {}
|
||||
for term, energy in element.binding_energy.items():
|
||||
element_data[term] = energy
|
||||
if element_data:
|
||||
data[element.number] = element_data
|
||||
with open(data_path, 'w', 'utf8') as fp:
|
||||
json.dump(data, fp, sort_keys=True, indent='\t')
|
||||
|
||||
|
||||
def init(table, reload=False):
|
||||
if 'binding_energy' in table.properties and not reload:
|
||||
return
|
||||
@ -142,6 +176,9 @@ def export_flat_text(f):
|
||||
"""
|
||||
export the binding energies to a flat general text file.
|
||||
|
||||
the file has four space-separated columns `number`, `symbol`, `term`, `energy`.
|
||||
column names are included in the first row.
|
||||
|
||||
@param f: file path or open file object
|
||||
@return: None
|
||||
"""
|
||||
@ -153,3 +190,23 @@ def export_flat_text(f):
|
||||
else:
|
||||
with open(f, "w") as fi:
|
||||
export_flat_text(fi)
|
||||
|
||||
|
||||
def import_flat_text(f):
|
||||
"""
|
||||
import binding energies from a flat general text file.
|
||||
|
||||
data is in space-separated columns.
|
||||
the first row contains column names.
|
||||
at least the columns `number`, `term`, `energy` must be present.
|
||||
|
||||
the function updates existing entries and appends entries of non-existing terms.
|
||||
existing terms that are not listed in the file remain unchanged.
|
||||
|
||||
@param f: file path or open file object
|
||||
|
||||
@return: None
|
||||
"""
|
||||
data = np.atleast_1d(np.genfromtxt(f, names=True, dtype=None, encoding="utf8"))
|
||||
for d in data:
|
||||
pt.elements[d['number']].binding_energy[d['term']] = d['energy']
|
||||
|
@ -92,6 +92,8 @@ def get_cross_section(photon_energy, element, nlj):
|
||||
@return: (float) cross section in Mb.
|
||||
"""
|
||||
nl = nlj[0:2]
|
||||
if not hasattr(element, "photoionization"):
|
||||
element = get_element(element)
|
||||
try:
|
||||
pet, cst = element.photoionization.cross_section[nl]
|
||||
except KeyError:
|
||||
@ -196,3 +198,11 @@ def plot_spectrum(photon_energy, elements, binding_energy=False, work_function=4
|
||||
ax.set_ylabel('intensity')
|
||||
ax.set_title(elements)
|
||||
return fig, ax
|
||||
|
||||
|
||||
def plot_cross_section(el, nlj):
|
||||
energy = np.arange(100, 1500, 140)
|
||||
cs = get_cross_section(energy, el, nlj)
|
||||
fig, ax = plt.subplots()
|
||||
ax.set_yscale("log")
|
||||
ax.plot(energy, cs)
|
||||
|
443
pmsco/graphics/population.py
Normal file
443
pmsco/graphics/population.py
Normal file
@ -0,0 +1,443 @@
|
||||
"""
|
||||
@package pmsco.graphics.population
|
||||
graphics rendering module for population dynamics.
|
||||
|
||||
the main function is render_genetic_chart().
|
||||
|
||||
this module is experimental.
|
||||
interface and implementation are subject to change.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2021 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
import os
|
||||
from pmsco.database import regular_params, special_params
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
||||
# from matplotlib.backends.backend_pdf import FigureCanvasPdf
|
||||
# from matplotlib.backends.backend_svg import FigureCanvasSVG
|
||||
except ImportError:
|
||||
Figure = None
|
||||
FigureCanvas = None
|
||||
logger.warning("error importing matplotlib. graphics rendering disabled.")
|
||||
|
||||
|
||||
def _default_range(pos):
|
||||
"""
|
||||
determine a default range from actual values.
|
||||
|
||||
@param pos: (numpy.ndarray) 1-dimensional structured array of parameter values.
|
||||
@return: range_min, range_max are dictionaries of the minimum and maximum values of each parameter.
|
||||
"""
|
||||
names = regular_params(pos.dtype.names)
|
||||
range_min = {}
|
||||
range_max = {}
|
||||
for name in names:
|
||||
range_min[name] = pos[name].min()
|
||||
range_max[name] = pos[name].max()
|
||||
return range_min, range_max
|
||||
|
||||
|
||||
def _prune_constant_params(pnames, range_min, range_max):
|
||||
"""
|
||||
remove constant parameters from the list and range
|
||||
|
||||
@param pnames: (list)
|
||||
@param range_min: (dict)
|
||||
@param range_max: (dict)
|
||||
@return:
|
||||
"""
|
||||
del_names = [name for name in pnames if range_max[name] <= range_min[name]]
|
||||
for name in del_names:
|
||||
pnames.remove(name)
|
||||
del range_min[name]
|
||||
del range_max[name]
|
||||
|
||||
|
||||
def render_genetic_chart(output_file, input_data_or_file, model_space=None, generations=None, title=None, cmap=None,
|
||||
canvas=None):
|
||||
"""
|
||||
produce a genetic chart from a given population.
|
||||
|
||||
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
|
||||
the axes are the particle number and the model parameter.
|
||||
the colour is mapped from the relative position of a parameter value within the parameter range.
|
||||
|
||||
the chart should illustrate the diversity in the population.
|
||||
converged parameters will show similar colours.
|
||||
by comparing charts of different generations, the effect of the optimization algorithm can be examined.
|
||||
though the chart type is designed for the genetic algorithm, it may be useful for other algorithms as well.
|
||||
|
||||
the function requires input in one of the following forms:
|
||||
- a result (.dat) file or numpy structured array.
|
||||
the array must contain regular parameters, as well as the _particle and _gen columns.
|
||||
the function generates one chart per generation unless the generation argument is specified.
|
||||
- a population (.pop) file or numpy structured array.
|
||||
the array must contain regular parameters, as well as the _particle columns.
|
||||
- a pmsco.optimizers.population.Population object with valid data.
|
||||
|
||||
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param output_file: path and base name of the output file without extension.
|
||||
a generation index and the file extension according to the file format are appended.
|
||||
@param input_data_or_file: a numpy structured ndarray of a population or result list from an optimization run.
|
||||
alternatively, the file path of a result file (.dat) or population file (.pop) can be given.
|
||||
file can be any object that numpy.genfromtxt() can handle.
|
||||
@param model_space: model space can be a pmsco.project.ModelSpace object,
|
||||
any object that contains the same min and max attributes as pmsco.project.ModelSpace,
|
||||
or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
|
||||
by default, the model space boundaries are derived from the input data.
|
||||
if a model_space is specified, only the parameters listed in it are plotted.
|
||||
@param generations: (int or sequence) generation index or list of indices.
|
||||
this index is used in the output file name and for filtering input data by generation.
|
||||
if the input data does not contain the generation, no filtering is applied.
|
||||
by default, no filtering is applied, and one graph for each generation is produced.
|
||||
@param title: (str) title of the chart.
|
||||
the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
|
||||
default: derived from file name.
|
||||
@param cmap: (str) name of colour map supported by matplotlib.
|
||||
default is 'jet'.
|
||||
other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
some other options are:
|
||||
matplotlib.backends.backend_pdf.FigureCanvasPdf or
|
||||
matplotlib.backends.backend_svg.FigureCanvasSVG.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
|
||||
try:
|
||||
pos = np.copy(input_data_or_file.pos)
|
||||
range_min = input_data_or_file.model_min
|
||||
range_max = input_data_or_file.model_max
|
||||
generations = [input_data_or_file.generation]
|
||||
except AttributeError:
|
||||
try:
|
||||
pos = np.atleast_1d(np.genfromtxt(input_data_or_file, names=True))
|
||||
except TypeError:
|
||||
pos = np.copy(input_data_or_file)
|
||||
range_min, range_max = _default_range(pos)
|
||||
pnames = regular_params(pos.dtype.names)
|
||||
|
||||
if model_space is not None:
|
||||
try:
|
||||
# a ModelSpace-like object
|
||||
range_min = model_space.min
|
||||
range_max = model_space.max
|
||||
except AttributeError:
|
||||
# a dictionary-like object
|
||||
range_min = model_space['min']
|
||||
range_max = model_space['max']
|
||||
try:
|
||||
pnames = range_min.keys()
|
||||
except AttributeError:
|
||||
pnames = range_min.dtype.names
|
||||
|
||||
pnames = list(pnames)
|
||||
_prune_constant_params(pnames, range_min, range_max)
|
||||
|
||||
if generations is None:
|
||||
try:
|
||||
generations = np.unique(pos['_gen'])
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
files = []
|
||||
path, base = os.path.split(output_file)
|
||||
if generations is not None and len(generations):
|
||||
if title is None:
|
||||
title = "{base} gen {gen}"
|
||||
|
||||
for generation in generations:
|
||||
idx = np.where(pos['_gen'] == generation)
|
||||
gpos = pos[idx]
|
||||
gtitle = title.format(base=base, gen=int(generation))
|
||||
out_filename = "{base}-{gen}".format(base=os.fspath(output_file), gen=int(generation))
|
||||
out_filename = _render_genetic_chart_2(out_filename, gpos, pnames, range_min, range_max,
|
||||
gtitle, cmap, canvas)
|
||||
files.append(out_filename)
|
||||
else:
|
||||
if title is None:
|
||||
title = "{base}"
|
||||
gtitle = title.format(base=base, gen="")
|
||||
out_filename = "{base}".format(base=os.fspath(output_file))
|
||||
out_filename = _render_genetic_chart_2(out_filename, pos, pnames, range_min, range_max, gtitle, cmap, canvas)
|
||||
files.append(out_filename)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def _render_genetic_chart_2(out_filename, pos, pnames, range_min, range_max, title, cmap, canvas):
|
||||
"""
|
||||
internal part of render_genetic_chart()
|
||||
|
||||
this function calculates the relative position in the model space,
|
||||
sorts the positions array by particle index,
|
||||
and calls plot_genetic_chart().
|
||||
|
||||
@param out_filename:
|
||||
@param pos:
|
||||
@param pnames:
|
||||
@param range_max:
|
||||
@param range_min:
|
||||
@param cmap:
|
||||
@param canvas:
|
||||
@return: out_filename
|
||||
"""
|
||||
spos = np.sort(pos, order='_particle')
|
||||
rpos2d = np.zeros((spos.shape[0], len(pnames)))
|
||||
for index, pname in enumerate(pnames):
|
||||
rpos2d[:, index] = (spos[pname] - range_min[pname]) / (range_max[pname] - range_min[pname])
|
||||
out_filename = plot_genetic_chart(out_filename, rpos2d, pnames, title=title, cmap=cmap, canvas=canvas)
|
||||
return out_filename
|
||||
|
||||
|
||||
def plot_genetic_chart(filename, rpos2d, param_labels, title=None, cmap=None, canvas=None):
|
||||
"""
|
||||
produce a genetic chart from the given data.
|
||||
|
||||
a genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
|
||||
the chart should highlight the amount of diversity in the population
|
||||
and - by comparing charts of different generations - the changes due to mutation.
|
||||
the axes are the model parameter (x) and particle number (y).
|
||||
the colour is mapped from the relative position of a parameter value within the parameter range.
|
||||
|
||||
in contrast to render_genetic_chart() this function contains only the drawing code.
|
||||
it requires input in the final form and does not do any checks, conversion or processing.
|
||||
|
||||
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param filename: path and name of the output file without extension.
|
||||
@param rpos2d: (two-dimensional numpy array of numeric type)
|
||||
relative positions of the particles in the model space.
|
||||
dimension 0 (y-axis) is the particle index,
|
||||
dimension 1 (x-axis) is the parameter index (in the order given by param_labels).
|
||||
all values must be between 0 and 1.
|
||||
@param param_labels: (sequence) list or tuple of parameter names.
|
||||
@param title: (str) string to be printed as chart title. default is 'genetic chart'.
|
||||
@param cmap: (str) name of colour map supported by matplotlib.
|
||||
default is 'jet'.
|
||||
other good-looking options are 'PiYG', 'RdBu', 'RdYlGn', 'coolwarm'.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
some other options are:
|
||||
matplotlib.backends.backend_pdf.FigureCanvasPdf or
|
||||
matplotlib.backends.backend_svg.FigureCanvasSVG.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
if canvas is None:
|
||||
canvas = FigureCanvas
|
||||
if cmap is None:
|
||||
cmap = 'jet'
|
||||
if title is None:
|
||||
title = 'genetic chart'
|
||||
|
||||
fig = Figure()
|
||||
canvas(fig)
|
||||
ax = fig.add_subplot(111)
|
||||
im = ax.imshow(rpos2d, aspect='auto', cmap=cmap, origin='lower')
|
||||
im.set_clim((0.0, 1.0))
|
||||
ax.set_xticks(np.arange(len(param_labels)))
|
||||
ax.set_xticklabels(param_labels, rotation=45, ha="right", rotation_mode="anchor")
|
||||
ax.set_ylabel('particle')
|
||||
ax.set_title(title)
|
||||
cb = ax.figure.colorbar(im, ax=ax)
|
||||
cb.ax.set_ylabel("relative value", rotation=-90, va="bottom")
|
||||
|
||||
out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
return out_filename
|
||||
|
||||
|
||||
def render_swarm(output_file, input_data, model_space=None, title=None, cmap=None, canvas=None):
|
||||
"""
|
||||
render a two-dimensional particle swarm population.
|
||||
|
||||
this function generates a schematic rendering of a particle swarm in two dimensions.
|
||||
particles are represented by their position and velocity, indicated by an arrow.
|
||||
the model space is projected on the first two (or selected two) variable parameters.
|
||||
in the background, a scatter plot of results (dots with pseudocolor representing the R-factor) can be plotted.
|
||||
the chart type is designed for the particle swarm optimization algorithm.
|
||||
|
||||
the function requires input in one of the following forms:
|
||||
- position (.pos), velocity (.vel) and result (.dat) files or the respective numpy structured arrays.
|
||||
the arrays must contain regular parameters, as well as the `_particle` column.
|
||||
the result file must also contain an `_rfac` column.
|
||||
- a pmsco.optimizers.population.Population object with valid data.
|
||||
|
||||
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param output_file: path and base name of the output file without extension.
|
||||
a generation index and the file extension according to the file format are appended.
|
||||
@param input_data: a pmsco.optimizers.population.Population object with valid data,
|
||||
or a sequence of position, velocity and result arrays.
|
||||
the arrays must be structured ndarrays corresponding to the respective Population members.
|
||||
alternatively, the arrays can be referenced as file paths
|
||||
in any format that numpy.genfromtxt() can handle.
|
||||
@param model_space: model space can be a pmsco.project.ModelSpace object,
|
||||
any object that contains the same min and max attributes as pmsco.project.ModelSpace,
|
||||
or a dictionary with to keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
|
||||
by default, the model space boundaries are derived from the input data.
|
||||
if a model_space is specified, only the parameters listed in it are plotted.
|
||||
@param title: (str) title of the chart.
|
||||
the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
|
||||
default: derived from file name.
|
||||
@param cmap: (str) name of colour map supported by matplotlib.
|
||||
default is 'plasma'.
|
||||
other good-looking options are 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
some other options are:
|
||||
matplotlib.backends.backend_pdf.FigureCanvasPdf or
|
||||
matplotlib.backends.backend_svg.FigureCanvasSVG.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
try:
|
||||
range_min = input_data.model_min
|
||||
range_max = input_data.model_max
|
||||
pos = np.copy(input_data.pos)
|
||||
vel = np.copy(input_data.vel)
|
||||
rfac = np.copy(input_data.results)
|
||||
generation = input_data.generation
|
||||
except AttributeError:
|
||||
try:
|
||||
pos = np.atleast_1d(np.genfromtxt(input_data[0], names=True))
|
||||
vel = np.atleast_1d(np.genfromtxt(input_data[1], names=True))
|
||||
rfac = np.atleast_1d(np.genfromtxt(input_data[2], names=True))
|
||||
except TypeError:
|
||||
pos = np.copy(input_data[0])
|
||||
vel = np.copy(input_data[1])
|
||||
rfac = np.copy(input_data[2])
|
||||
range_min, range_max = _default_range(rfac)
|
||||
pnames = regular_params(pos.dtype.names)
|
||||
|
||||
if model_space is not None:
|
||||
try:
|
||||
# a ModelSpace-like object
|
||||
range_min = model_space.min
|
||||
range_max = model_space.max
|
||||
except AttributeError:
|
||||
# a dictionary-like object
|
||||
range_min = model_space['min']
|
||||
range_max = model_space['max']
|
||||
try:
|
||||
pnames = range_min.keys()
|
||||
except AttributeError:
|
||||
pnames = range_min.dtype.names
|
||||
|
||||
pnames = list(pnames)
|
||||
_prune_constant_params(pnames, range_min, range_max)
|
||||
pnames = pnames[0:2]
|
||||
files = []
|
||||
if len(pnames) == 2:
|
||||
params = {pnames[0]: [range_min[pnames[0]], range_max[pnames[0]]],
|
||||
pnames[1]: [range_min[pnames[1]], range_max[pnames[1]]]}
|
||||
out_filename = plot_swarm(output_file, pos, vel, rfac, params, title=title, cmap=cmap, canvas=canvas)
|
||||
files.append(out_filename)
|
||||
else:
|
||||
logging.warning("model space must be two-dimensional and non-degenerate.")
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def plot_swarm(filename, pos, vel, rfac, params, title=None, cmap=None, canvas=None):
|
||||
"""
|
||||
plot a two-dimensional particle swarm population.
|
||||
|
||||
this is a sub-function of render_swarm() containing just the plotting commands.
|
||||
|
||||
the graphics file format can be changed by providing a specific canvas. default is PNG.
|
||||
|
||||
this function requires the matplotlib module.
|
||||
if it is not available, the function raises an error.
|
||||
|
||||
@param filename: path and base name of the output file without extension.
|
||||
a generation index and the file extension according to the file format are appended.
|
||||
@param pos: structured ndarray containing the positions of the particles.
|
||||
@param vel: structured ndarray containing the velocities of the particles.
|
||||
@param rfac: structured ndarray containing positions and R-factor values.
|
||||
this array is independent of pos and vel.
|
||||
it can also be set to None if results should be suppressed.
|
||||
@param params: dictionary of two parameters to be plotted.
|
||||
the keys correspond to columns of the pos, vel and rfac arrays.
|
||||
the values are lists [minimum, maximum] that define the axis range.
|
||||
@param title: (str) title of the chart.
|
||||
the title is a {}-style format string, where {base} is the output file name and {gen} is the generation.
|
||||
default: derived from file name.
|
||||
@param cmap: (str) name of colour map supported by matplotlib.
|
||||
default is 'plasma'.
|
||||
other good-looking options are 'viridis', 'plasma', 'inferno', 'magma', 'cividis'.
|
||||
@param canvas: a FigureCanvas class reference from a matplotlib backend.
|
||||
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
|
||||
some other options are:
|
||||
matplotlib.backends.backend_pdf.FigureCanvasPdf or
|
||||
matplotlib.backends.backend_svg.FigureCanvasSVG.
|
||||
|
||||
@return (str) path and name of the generated graphics file.
|
||||
empty string if an error occurred.
|
||||
|
||||
@raise TypeError if matplotlib is not available.
|
||||
"""
|
||||
if canvas is None:
|
||||
canvas = FigureCanvas
|
||||
if cmap is None:
|
||||
cmap = 'plasma'
|
||||
if title is None:
|
||||
title = 'swarm map'
|
||||
|
||||
pnames = list(params.keys())
|
||||
fig = Figure()
|
||||
canvas(fig)
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
if rfac is not None:
|
||||
try:
|
||||
s = ax.scatter(rfac[params[0]], rfac[params[1]], s=5, c=rfac['_rfac'], cmap=cmap, vmin=0, vmax=1)
|
||||
except ValueError:
|
||||
# _rfac column missing
|
||||
pass
|
||||
else:
|
||||
cb = ax.figure.colorbar(s, ax=ax)
|
||||
cb.ax.set_ylabel("R-factor", rotation=-90, va="bottom")
|
||||
|
||||
p = ax.plot(pos[pnames[0]], pos[pnames[1]], 'co')
|
||||
q = ax.quiver(pos[pnames[0]], pos[pnames[1]], vel[pnames[0]], vel[pnames[1]], color='c')
|
||||
ax.set_xlim(params[pnames[0]])
|
||||
ax.set_ylim(params[pnames[1]])
|
||||
ax.set_xlabel(pnames[0])
|
||||
ax.set_ylabel(pnames[1])
|
||||
ax.set_title(title)
|
||||
|
||||
out_filename = "{base}.{ext}".format(base=filename, ext=canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
return out_filename
|
@ -7,16 +7,13 @@ interface and implementation are subject to change.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2018 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2018-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
@ -135,9 +132,8 @@ def render_ea_scan(filename, data, scan_mode, canvas=None, is_modf=False):
|
||||
im.set_cmap("RdBu_r")
|
||||
dhi = max(abs(dlo), abs(dhi))
|
||||
dlo = -dhi
|
||||
im.set_clim((dlo, dhi))
|
||||
im.set_clim((-1., 1.))
|
||||
try:
|
||||
# requires matplotlib 2.1.0
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), 0., max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
@ -213,9 +209,8 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
|
||||
# im.set_cmap("coolwarm")
|
||||
dhi = max(abs(dlo), abs(dhi))
|
||||
dlo = -dhi
|
||||
pc.set_clim((dlo, dhi))
|
||||
pc.set_clim((-1., 1.))
|
||||
try:
|
||||
# requires matplotlib 2.1.0
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), 0., max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
@ -226,9 +221,12 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
|
||||
# im.set_cmap("inferno")
|
||||
# im.set_cmap("viridis")
|
||||
pc.set_clim((dlo, dhi))
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
try:
|
||||
ti = cb.get_ticks()
|
||||
ti = [min(ti), max(ti)]
|
||||
cb.set_ticks(ti)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
|
||||
fig.savefig(out_filename)
|
||||
|
@ -40,23 +40,20 @@ the scan and domain handlers call methods of the project class to invoke project
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import datetime
|
||||
from functools import reduce
|
||||
import logging
|
||||
import math
|
||||
import numpy as np
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
@ -377,7 +374,7 @@ class SingleModelHandler(ModelHandler):
|
||||
keys = [key for key in self.result]
|
||||
keys.sort(key=lambda t: t[0].lower())
|
||||
vals = (str(self.result[key]) for key in keys)
|
||||
filename = self._project.output_file + ".dat"
|
||||
filename = Path(self._project.output_file).with_suffix(".dat")
|
||||
with open(filename, "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(keys))
|
||||
@ -437,11 +434,11 @@ class ScanHandler(TaskHandler):
|
||||
|
||||
if project.combined_scan is not None:
|
||||
ext = md.format_extension(project.combined_scan)
|
||||
filename = project.output_file + ext
|
||||
filename = Path(project.output_file).with_suffix(ext)
|
||||
md.save_data(filename, project.combined_scan)
|
||||
if project.combined_modf is not None:
|
||||
ext = md.format_extension(project.combined_modf)
|
||||
filename = project.output_file + ".modf" + ext
|
||||
filename = Path(project.output_file).with_suffix(".modf" + ext)
|
||||
md.save_data(filename, project.combined_modf)
|
||||
|
||||
return len(self._project.scans)
|
||||
@ -695,7 +692,7 @@ class EmitterHandler(TaskHandler):
|
||||
the estimate is based on the start parameters, scan 0 and domain 0.
|
||||
"""
|
||||
super(EmitterHandler, self).setup(project, slots)
|
||||
mock_model = self._project.create_model_space().start
|
||||
mock_model = self._project.model_space.start
|
||||
mock_index = dispatch.CalcID(-1, 0, 0, -1, -1)
|
||||
n_emitters = project.cluster_generator.count_emitters(mock_model, mock_index)
|
||||
return n_emitters
|
||||
|
@ -304,7 +304,7 @@ class GridSearchHandler(handlers.ModelHandler):
|
||||
super(GridSearchHandler, self).setup(project, slots)
|
||||
|
||||
self._pop = GridPopulation()
|
||||
self._pop.setup(self._project.create_model_space())
|
||||
self._pop.setup(self._project.model_space)
|
||||
self._invalid_limit = max(slots, self._invalid_limit)
|
||||
|
||||
self._outfile = open(self._project.output_file + ".dat", "w")
|
||||
|
@ -554,7 +554,7 @@ class Population(object):
|
||||
however, the patch is applied only upon the next execution of advance_population().
|
||||
|
||||
an info or warning message is printed to the log
|
||||
depending on whether the filed contained a complete dataset or not.
|
||||
depending on whether the file contained a complete dataset or not.
|
||||
|
||||
@attention patching a live population is a potentially dangerous operation.
|
||||
it may cause an optimization to abort because of an error in the file.
|
||||
@ -1209,7 +1209,7 @@ class PopulationHandler(handlers.ModelHandler):
|
||||
return self._pop_size
|
||||
|
||||
def setup_population(self):
|
||||
self._pop.setup(self._pop_size, self._project.create_model_space(), **self._project.optimizer_params)
|
||||
self._pop.setup(self._pop_size, self._project.model_space, **self._project.optimizer_params)
|
||||
|
||||
def cleanup(self):
|
||||
super(PopulationHandler, self).cleanup()
|
||||
|
352
pmsco/pmsco.py
352
pmsco/pmsco.py
@ -6,12 +6,12 @@ PEARL Multiple-Scattering Calculation and Structural Optimization
|
||||
|
||||
this is the top-level interface of the PMSCO package.
|
||||
all calculations (any mode, any project) start by calling the run_project() function of this module.
|
||||
the module also provides a command line parser for common options.
|
||||
the module also provides a command line and a run-file/run-dict interface.
|
||||
|
||||
for parallel execution, prefix the command line with mpi_exec -np NN, where NN is the number of processes to use.
|
||||
note that in parallel mode, one process takes the role of the coordinator (master).
|
||||
the master does not run calculations and is idle most of the time.
|
||||
to benefit from parallel execution on a work station, NN should be the number of processors plus one.
|
||||
to benefit from parallel execution on a work station, NN should be the number of processors.
|
||||
on a cluster, the number of processes is chosen according to the available resources.
|
||||
|
||||
all calculations can also be run in a single process.
|
||||
@ -25,26 +25,35 @@ refer to the projects folder for examples.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-18 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
from builtins import range
|
||||
import datetime
|
||||
import logging
|
||||
import importlib
|
||||
import os.path
|
||||
import commentjson as json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from mpi4py import MPI
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_size = mpi_comm.Get_size()
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
except ImportError:
|
||||
MPI = None
|
||||
mpi_comm = None
|
||||
mpi_size = 1
|
||||
mpi_rank = 0
|
||||
|
||||
pmsco_root = Path(__file__).resolve().parent.parent
|
||||
if str(pmsco_root) not in sys.path:
|
||||
sys.path.insert(0, str(pmsco_root))
|
||||
|
||||
import pmsco.dispatch as dispatch
|
||||
import pmsco.files as files
|
||||
@ -71,40 +80,36 @@ def setup_logging(enable=False, filename="pmsco.log", level="WARNING"):
|
||||
|
||||
@param enable: (bool) True=enable logging to the specified file,
|
||||
False=do not generate a log (null handler).
|
||||
@param filename: (string) path and name of the log file.
|
||||
@param filename: (Path-like) path and name of the log file.
|
||||
if this process is part of an MPI communicator,
|
||||
the function inserts a dot and the MPI rank of this process before the extension.
|
||||
if the filename is empty, logging is disabled.
|
||||
@param level: (string) name of the log level.
|
||||
must be the name of one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
|
||||
if empty or invalid, the function raises a ValueError.
|
||||
if empty, logging is disabled.
|
||||
if not a valid level, defaults to "WARNING".
|
||||
@return None
|
||||
"""
|
||||
numeric_level = getattr(logging, level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError('Invalid log level: %s' % level)
|
||||
|
||||
logger = logging.getLogger("")
|
||||
logger.setLevel(numeric_level)
|
||||
|
||||
logformat = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
||||
formatter = logging.Formatter(logformat)
|
||||
enable = enable and str(filename) and level
|
||||
numeric_level = getattr(logging, level.upper(), logging.WARNING)
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(numeric_level)
|
||||
|
||||
if enable:
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_size = mpi_comm.Get_size()
|
||||
if mpi_size > 1:
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
root, ext = os.path.splitext(filename)
|
||||
filename = root + "." + str(mpi_rank) + ext
|
||||
p = Path(filename)
|
||||
filename = p.with_suffix(f".{mpi_rank}" + p.suffix)
|
||||
|
||||
log_format = '%(asctime)s (%(name)s) %(levelname)s: %(message)s'
|
||||
formatter = logging.Formatter(log_format)
|
||||
|
||||
handler = logging.FileHandler(filename, mode="w", delay=True)
|
||||
handler.setLevel(numeric_level)
|
||||
|
||||
handler.setFormatter(formatter)
|
||||
else:
|
||||
handler = logging.NullHandler()
|
||||
|
||||
logger.addHandler(handler)
|
||||
root_logger.addHandler(handler)
|
||||
|
||||
|
||||
def set_common_args(project, args):
|
||||
@ -124,67 +129,58 @@ def set_common_args(project, args):
|
||||
|
||||
@return: None
|
||||
"""
|
||||
log_file = "pmsco.log"
|
||||
|
||||
if args.data_dir:
|
||||
project.data_dir = args.data_dir
|
||||
if args.output_file:
|
||||
project.set_output(args.output_file)
|
||||
log_file = args.output_file + ".log"
|
||||
project.output_file = args.output_file
|
||||
if args.db_file:
|
||||
project.db_file = args.db_file
|
||||
if args.log_file:
|
||||
log_file = args.log_file
|
||||
setup_logging(enable=args.log_enable, filename=log_file, level=args.log_level)
|
||||
|
||||
logger.debug("creating project")
|
||||
mode = args.mode.lower()
|
||||
if mode in {'single', 'grid', 'swarm', 'genetic', 'table'}:
|
||||
project.mode = mode
|
||||
else:
|
||||
logger.error("invalid optimization mode '%s'.", mode)
|
||||
|
||||
if args.pop_size:
|
||||
project.optimizer_params['pop_size'] = args.pop_size
|
||||
|
||||
if args.seed_file:
|
||||
project.optimizer_params['seed_file'] = args.seed_file
|
||||
if args.seed_limit:
|
||||
project.optimizer_params['seed_limit'] = args.seed_limit
|
||||
if args.table_file:
|
||||
project.optimizer_params['table_file'] = args.table_file
|
||||
|
||||
project.log_file = args.log_file
|
||||
if args.log_level:
|
||||
project.log_level = args.log_level
|
||||
if not args.log_enable:
|
||||
project.log_file = ""
|
||||
project.log_level = ""
|
||||
if args.mode:
|
||||
project.mode = args.mode.lower()
|
||||
if args.time_limit:
|
||||
project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))
|
||||
|
||||
project.time_limit = args.time_limit
|
||||
if args.keep_files:
|
||||
if "all" in args.keep_files:
|
||||
cats = set([])
|
||||
else:
|
||||
cats = files.FILE_CATEGORIES - set(args.keep_files)
|
||||
cats -= {'report'}
|
||||
if mode == 'single':
|
||||
cats -= {'model'}
|
||||
project.files.categories_to_delete = cats
|
||||
if args.keep_levels > project.keep_levels:
|
||||
project.keep_levels = args.keep_levels
|
||||
if args.keep_best > project.keep_best:
|
||||
project.keep_best = args.keep_best
|
||||
project.keep_files = args.keep_files
|
||||
if args.keep_levels:
|
||||
project.keep_levels = max(args.keep_levels, project.keep_levels)
|
||||
if args.keep_best:
|
||||
project.keep_best = max(args.keep_best, project.keep_best)
|
||||
|
||||
|
||||
def run_project(project):
|
||||
"""
|
||||
run a calculation project.
|
||||
|
||||
@param project:
|
||||
@return:
|
||||
the function sets up logging, validates the project, chooses the handler classes,
|
||||
and passes control to the pmsco.dispatch module to run the calculations.
|
||||
|
||||
@param project: fully initialized project object.
|
||||
the validate method is called as part of this function after setting up the logger.
|
||||
@return: None
|
||||
"""
|
||||
# log project arguments only in rank 0
|
||||
mpi_comm = MPI.COMM_WORLD
|
||||
mpi_rank = mpi_comm.Get_rank()
|
||||
|
||||
log_file = Path(project.log_file)
|
||||
if not log_file.name:
|
||||
log_file = Path(project.job_name).with_suffix(".log")
|
||||
if log_file.name:
|
||||
log_file.parent.mkdir(exist_ok=True)
|
||||
log_level = project.log_level
|
||||
else:
|
||||
log_level = ""
|
||||
setup_logging(enable=bool(log_level), filename=log_file, level=log_level)
|
||||
if mpi_rank == 0:
|
||||
project.log_project_args()
|
||||
|
||||
project.validate()
|
||||
|
||||
optimizer_class = None
|
||||
if project.mode == 'single':
|
||||
optimizer_class = handlers.SingleModelHandler
|
||||
@ -221,6 +217,34 @@ def run_project(project):
|
||||
logger.error("undefined project, optimizer, or calculator.")
|
||||
|
||||
|
||||
def schedule_project(project, run_dict):
|
||||
"""
|
||||
schedule a calculation project.
|
||||
|
||||
the function validates the project and submits a job to the scheduler.
|
||||
|
||||
@param project: fully initialized project object.
|
||||
the validate method is called as part of this function.
|
||||
|
||||
@param run_dict: dictionary holding the contents of the run file.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
assert mpi_rank == 0
|
||||
setup_logging(enable=False)
|
||||
|
||||
project.validate()
|
||||
|
||||
schedule_dict = run_dict['schedule']
|
||||
module = importlib.import_module(schedule_dict['__module__'])
|
||||
schedule_class = getattr(module, schedule_dict['__class__'])
|
||||
schedule = schedule_class(project)
|
||||
schedule.set_properties(module, schedule_dict, project)
|
||||
schedule.run_dict = run_dict
|
||||
schedule.validate()
|
||||
schedule.submit()
|
||||
|
||||
|
||||
class Args(object):
|
||||
"""
|
||||
arguments of the main function.
|
||||
@ -233,7 +257,7 @@ class Args(object):
|
||||
values as the command line parser.
|
||||
"""
|
||||
|
||||
def __init__(self, mode="single", output_file="pmsco_data"):
|
||||
def __init__(self):
|
||||
"""
|
||||
constructor.
|
||||
|
||||
@ -242,12 +266,8 @@ class Args(object):
|
||||
other parameters may be required depending on the project
|
||||
and/or the calculation mode.
|
||||
"""
|
||||
self.mode = mode
|
||||
self.pop_size = 0
|
||||
self.seed_file = ""
|
||||
self.seed_limit = 0
|
||||
self.data_dir = ""
|
||||
self.output_file = output_file
|
||||
self.output_file = ""
|
||||
self.db_file = ""
|
||||
self.time_limit = 24.0
|
||||
self.keep_files = files.FILE_CATEGORIES_TO_KEEP
|
||||
@ -256,13 +276,9 @@ class Args(object):
|
||||
self.log_level = "WARNING"
|
||||
self.log_file = ""
|
||||
self.log_enable = True
|
||||
self.table_file = ""
|
||||
|
||||
|
||||
def get_cli_parser(default_args=None):
|
||||
if not default_args:
|
||||
default_args = Args()
|
||||
|
||||
def get_cli_parser():
|
||||
KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
@ -290,56 +306,45 @@ def get_cli_parser(default_args=None):
|
||||
# for simplicity, the parser does not check these requirements.
|
||||
# all parameters are optional and accepted regardless of mode.
|
||||
# errors may occur if implicit requirements are not met.
|
||||
parser.add_argument('project_module',
|
||||
parser.add_argument('project_module', nargs='?',
|
||||
help="path to custom module that defines the calculation project")
|
||||
parser.add_argument('-m', '--mode', default=default_args.mode,
|
||||
parser.add_argument('-r', '--run-file',
|
||||
help="path to run-time parameters file which contains all program arguments. " +
|
||||
"must be in JSON format.")
|
||||
parser.add_argument('-m', '--mode',
|
||||
choices=['single', 'grid', 'swarm', 'genetic', 'table'],
|
||||
help='calculation mode')
|
||||
parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
|
||||
help='population size (number of particles) in swarm or genetic optimization mode. ' +
|
||||
'default is the greater of 4 or the number of calculation processes.')
|
||||
parser.add_argument('--seed-file',
|
||||
help='path and name of population seed file. ' +
|
||||
'population data of previous optimizations can be used to seed a new optimization. ' +
|
||||
'the file must have the same structure as the .pop or .dat files.')
|
||||
parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
|
||||
help='maximum number of models to use from the seed file. ' +
|
||||
'the models with the best R-factors are selected.')
|
||||
parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
|
||||
parser.add_argument('-d', '--data-dir',
|
||||
help='directory path for experimental data files (if required by project). ' +
|
||||
'default: working directory')
|
||||
parser.add_argument('-o', '--output-file', default=default_args.output_file,
|
||||
parser.add_argument('-o', '--output-file',
|
||||
help='base path for intermediate and output files.')
|
||||
parser.add_argument('-b', '--db-file', default=default_args.db_file,
|
||||
parser.add_argument('-b', '--db-file',
|
||||
help='name of an sqlite3 database file where the results should be stored.')
|
||||
parser.add_argument('--table-file',
|
||||
help='path and name of population table file for table optimization mode. ' +
|
||||
'the file must have the same structure as the .pop or .dat files.')
|
||||
parser.add_argument('-k', '--keep-files', nargs='*', default=default_args.keep_files,
|
||||
parser.add_argument('-k', '--keep-files', nargs='*',
|
||||
choices=KEEP_FILES_CHOICES,
|
||||
help='output file categories to keep after the calculation. '
|
||||
'by default, cluster and model (simulated data) '
|
||||
'of a limited number of best models are kept.')
|
||||
parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
|
||||
parser.add_argument('--keep-best', type=int,
|
||||
help='number of best models for which to keep result files '
|
||||
'(at each node from root down to keep-levels).')
|
||||
parser.add_argument('--keep-levels', type=int, choices=range(5),
|
||||
default=default_args.keep_levels,
|
||||
help='task level down to which result files of best models are kept. '
|
||||
'0 = model, 1 = scan, 2 = domain, 3 = emitter, 4 = region.')
|
||||
parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
|
||||
parser.add_argument('-t', '--time-limit', type=float,
|
||||
help='wall time limit in hours. the optimizers try to finish before the limit.')
|
||||
parser.add_argument('--log-file', default=default_args.log_file,
|
||||
parser.add_argument('--log-file',
|
||||
help='name of the main log file. ' +
|
||||
'under MPI, the rank of the process is inserted before the extension.')
|
||||
parser.add_argument('--log-level', default=default_args.log_level,
|
||||
parser.add_argument('--log-level',
|
||||
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
|
||||
feature_parser = parser.add_mutually_exclusive_group(required=False)
|
||||
feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
|
||||
help="enable logging. by default, logging is on.")
|
||||
feature_parser.add_argument('--log-disable', dest='log_enable', action='store_false',
|
||||
help="disable logging. by default, logging is on.")
|
||||
parser.set_defaults(log_enable=default_args.log_enable)
|
||||
parser.set_defaults(log_enable=True)
|
||||
|
||||
return parser
|
||||
|
||||
@ -350,52 +355,135 @@ def parse_cli():
|
||||
|
||||
@return: Namespace object created by the argument parser.
|
||||
"""
|
||||
default_args = Args()
|
||||
parser = get_cli_parser(default_args)
|
||||
parser = get_cli_parser()
|
||||
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
|
||||
return args, unknown_args
|
||||
|
||||
|
||||
def import_project_module(path):
|
||||
def import_module(module_name):
|
||||
"""
|
||||
import the custom project module.
|
||||
import a custom module by name.
|
||||
|
||||
imports the project module given its file path.
|
||||
the path is expanded to its absolute form and appended to the python path.
|
||||
import a module given its file path or module name (like in an import statement).
|
||||
|
||||
@param path: path and name of the module to be loaded.
|
||||
path is optional and defaults to the python path.
|
||||
if the name includes an extension, it is stripped off.
|
||||
preferably, the module name should be given as in an import statement.
|
||||
as the top-level pmsco directory is on the python path,
|
||||
the module name will begin with `projects` for a custom project module or `pmsco` for a core pmsco module.
|
||||
in this case, the function just calls importlib.import_module.
|
||||
|
||||
if a file path is given, i.e., `module_name` links to an existing file and has a `.py` extension,
|
||||
the function extracts the directory path,
|
||||
inserts it into the python path,
|
||||
and calls importlib.import_module on the stem of the file name.
|
||||
|
||||
@note the file path remains in the python path.
|
||||
this option should be used carefully to avoid breaking file name resolution.
|
||||
|
||||
@param module_name: file path or module name.
|
||||
file path is interpreted relative to the working directory.
|
||||
|
||||
@return: the loaded module as a python object
|
||||
"""
|
||||
path, name = os.path.split(path)
|
||||
name, __ = os.path.splitext(name)
|
||||
path = os.path.abspath(path)
|
||||
sys.path.append(path)
|
||||
project_module = importlib.import_module(name)
|
||||
return project_module
|
||||
p = Path(module_name)
|
||||
if p.is_file() and p.suffix == ".py":
|
||||
path = p.parent.resolve()
|
||||
module_name = p.stem
|
||||
if path not in sys.path:
|
||||
sys.path.insert(0, path)
|
||||
|
||||
module = importlib.import_module(module_name)
|
||||
return module
|
||||
|
||||
|
||||
def main_dict(run_params):
|
||||
"""
|
||||
main function with dictionary run-time parameters
|
||||
|
||||
this starts the whole process with all direct parameters.
|
||||
the command line is not parsed.
|
||||
no run-file is loaded (just the project module).
|
||||
|
||||
@param run_params: dictionary with the same structure as the JSON run-file.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
project_params = run_params['project']
|
||||
|
||||
module = importlib.import_module(project_params['__module__'])
|
||||
try:
|
||||
project_class = getattr(module, project_params['__class__'])
|
||||
except KeyError:
|
||||
project = module.create_project()
|
||||
else:
|
||||
project = project_class()
|
||||
|
||||
project._module = module
|
||||
project.directories['pmsco'] = Path(__file__).parent
|
||||
project.directories['project'] = Path(module.__file__).parent
|
||||
project.set_properties(module, project_params, project)
|
||||
run_project(project)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
main function with command line parsing
|
||||
|
||||
this function starts the whole process with parameters from the command line.
|
||||
|
||||
if the command line contains a run-file parameter, it determines the module to load and the project parameters.
|
||||
otherwise, the command line parameters apply.
|
||||
|
||||
the project class can be specified either in the run-file or the project module.
|
||||
if the run-file specifies a class name, that class is looked up in the project module and instantiated.
|
||||
otherwise, the module's create_project is called.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
args, unknown_args = parse_cli()
|
||||
|
||||
if args:
|
||||
module = import_project_module(args.project_module)
|
||||
try:
|
||||
project_args = module.parse_project_args(unknown_args)
|
||||
except NameError:
|
||||
project_args = None
|
||||
try:
|
||||
with open(args.run_file, 'r') as f:
|
||||
rf = json.load(f)
|
||||
except AttributeError:
|
||||
rfp = {'__module__': args.project_module}
|
||||
else:
|
||||
rfp = rf['project']
|
||||
|
||||
module = import_module(rfp['__module__'])
|
||||
try:
|
||||
project_args = module.parse_project_args(unknown_args)
|
||||
except AttributeError:
|
||||
project_args = None
|
||||
|
||||
try:
|
||||
project_class = getattr(module, rfp['__class__'])
|
||||
except (AttributeError, KeyError):
|
||||
project = module.create_project()
|
||||
set_common_args(project, args)
|
||||
try:
|
||||
module.set_project_args(project, project_args)
|
||||
except NameError:
|
||||
pass
|
||||
else:
|
||||
project = project_class()
|
||||
project_args = None
|
||||
|
||||
project._module = module
|
||||
project.directories['pmsco'] = Path(__file__).parent
|
||||
project.directories['project'] = Path(module.__file__).parent
|
||||
project.set_properties(module, rfp, project)
|
||||
|
||||
set_common_args(project, args)
|
||||
try:
|
||||
if project_args:
|
||||
module.set_project_args(project, project_args)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
schedule_enabled = rf['schedule']['enabled']
|
||||
except KeyError:
|
||||
schedule_enabled = False
|
||||
if schedule_enabled:
|
||||
schedule_project(project, rf)
|
||||
else:
|
||||
run_project(project)
|
||||
|
||||
|
||||
|
592
pmsco/project.py
592
pmsco/project.py
@ -19,36 +19,32 @@ the ModelSpace and CalculatorParams classes are typically used unchanged.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import datetime
|
||||
import git
|
||||
import logging
|
||||
import numpy as np
|
||||
import os.path
|
||||
from pathlib import Path
|
||||
import socket
|
||||
import sys
|
||||
|
||||
from pmsco.calculators.calculator import InternalAtomicCalculator
|
||||
from pmsco.calculators.edac import EdacCalculator
|
||||
import pmsco.cluster as mc
|
||||
import pmsco.cluster
|
||||
import pmsco.config as config
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
import pmsco.database as database
|
||||
import pmsco.dispatch as dispatch
|
||||
import pmsco.files as files
|
||||
import pmsco.handlers as handlers
|
||||
import pmsco.database
|
||||
import pmsco.dispatch
|
||||
import pmsco.files
|
||||
import pmsco.handlers
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -157,6 +153,34 @@ class ModelSpace(object):
|
||||
"""
|
||||
return ParamSpace(self.start[name], self.min[name], self.max[name], self.step[name])
|
||||
|
||||
def set_param_dict(self, d):
|
||||
"""
|
||||
initialize model space from dictionary.
|
||||
|
||||
@param d: dictionary with two levels:
|
||||
the top level are parameter names,
|
||||
the second level the space descriptors 'start', 'min', 'max', 'step' and 'width'.
|
||||
see add_param() for possible combinations.
|
||||
@return: None
|
||||
"""
|
||||
self.__init__()
|
||||
for k, v in d.items():
|
||||
self.add_param(k, **v)
|
||||
|
||||
def get_param_dict(self):
|
||||
"""
|
||||
return model space parameters in dictionary form
|
||||
|
||||
the top level are parameter names,
|
||||
the second level the space descriptors 'start', 'min', 'max' and 'step'.
|
||||
|
||||
@return: dict
|
||||
"""
|
||||
d = {}
|
||||
for name in self.start:
|
||||
d[name] = {self.start[name], self.min[name], self.max[name], self.step[name]}
|
||||
return d
|
||||
|
||||
|
||||
class CalculatorParams(object):
|
||||
"""
|
||||
@ -568,9 +592,166 @@ class Scan(object):
|
||||
self.raw_data[dim] = grid[i].reshape(-1)
|
||||
self.raw_data['i'] = 1
|
||||
|
||||
def load(self):
|
||||
return self
|
||||
|
||||
|
||||
class ScanKey(config.ConfigurableObject):
|
||||
"""
|
||||
create a Scan object based on a project-supplied dictionary
|
||||
|
||||
this class can be used in a run file to create a scan object based on the scan_dict attribute of the project.
|
||||
this may be convenient if you're project should selectively use scans out of a long list of data files
|
||||
and you don't want to clutter up the run file with parameters that don't change.
|
||||
|
||||
to do so, set the key property to match an item of scan_dict.
|
||||
the load method will look up the corresponding scan_dict item and construct the final Scan object.
|
||||
"""
|
||||
def __init__(self, project=None):
|
||||
super().__init__()
|
||||
self.key = ""
|
||||
self.project = project
|
||||
|
||||
def load(self, dirs=None):
|
||||
"""
|
||||
load the selected scan as specified in the project's scan dictionary
|
||||
|
||||
the method uses ScanLoader or ScanCreator as an intermediate.
|
||||
|
||||
@return a new Scan object which contains the loaded data.
|
||||
"""
|
||||
scan_spec = self.project.scan_dict[self.key]
|
||||
if hasattr(scan_spec, 'positions'):
|
||||
loader = ScanCreator()
|
||||
else:
|
||||
loader = ScanLoader()
|
||||
for k, v in scan_spec.items():
|
||||
setattr(loader, k, v)
|
||||
scan = loader.load(dirs=dirs)
|
||||
return scan
|
||||
|
||||
|
||||
class ScanLoader(config.ConfigurableObject):
|
||||
"""
|
||||
create a Scan object from a data file reference
|
||||
|
||||
this class can be used in a run file to create a scan object from an experimental data file.
|
||||
to do so, fill the properties with values as documented.
|
||||
the load() method is called when the project is run.
|
||||
"""
|
||||
|
||||
## @var filename (string)
|
||||
# file name from which the scan should be loaded.
|
||||
# the file name can contain a format specifier like {project} to include the base path.
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
|
||||
# of photo-emitting atoms.
|
||||
# the interpretation of this string is up to the project and its cluster generator.
|
||||
# it should, however, always start with a chemical element symbol.
|
||||
#
|
||||
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
#
|
||||
# in the form expected by EDAC, for example: '2p1/2'
|
||||
|
||||
## @var is_modf (bool)
|
||||
# declares whether the data file contains the modulation function rather than intensity values
|
||||
#
|
||||
# if false, the project will calculate a modulation function from the raw data
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.filename = ""
|
||||
self.emitter = ""
|
||||
self.initial_state = "1s"
|
||||
self.is_modf = False
|
||||
|
||||
def load(self, dirs=None):
|
||||
"""
|
||||
load the scan according to specification
|
||||
|
||||
create a new Scan object and load the file by calling Scan.import_scan_file().
|
||||
|
||||
@return a new Scan object which contains the loaded data file.
|
||||
"""
|
||||
scan = Scan()
|
||||
filename = config.resolve_path(self.filename, dirs)
|
||||
scan.import_scan_file(filename, self.emitter, self.initial_state)
|
||||
if self.is_modf:
|
||||
scan.modulation = scan.raw_data
|
||||
return scan
|
||||
|
||||
|
||||
class ScanCreator(config.ConfigurableObject):
|
||||
"""
|
||||
create a Scan object from string expressions
|
||||
|
||||
this class can be used in a run file to create a scan object from python expressions,
|
||||
such as lists, ranges or numpy functions.
|
||||
to do so, fill the properties with values as documented.
|
||||
the load() method is called when the project is run.
|
||||
|
||||
@note the raw_data property of the scan cannot be filled this way.
|
||||
thus, the class is useful in `single` calculation mode only.
|
||||
"""
|
||||
|
||||
## @var filename (string)
|
||||
# name of the file which should receive the scan data.
|
||||
# the file name can contain a format specifier like {project} to include the base path.
|
||||
|
||||
## @var positions (dict)
|
||||
# dictionary specifying the scan positions
|
||||
#
|
||||
# the dictionary must contain four keys: 'e', 't', 'p', 'a' representing the four scan axes.
|
||||
# each key holds a string that contains a python expression.
|
||||
# the string is evaluated using python's built-in eval() function.
|
||||
# the expression must evaluate to an iterable object or numpy ndarray of the scan positions.
|
||||
# the `np` namespace can be used to access numpy functions.
|
||||
#
|
||||
# example:
|
||||
# the following dictionary generates a hemispherical scan
|
||||
# self.position = {'e': '100', 't': 'np.linspace(0, 90, 91)', 'p': 'range(0, 360, 2)', 'a': '0'}
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
|
||||
# of photo-emitting atoms.
|
||||
# the interpretation of this string is up to the project and its cluster generator.
|
||||
# it should, however, always start with a chemical element symbol.
|
||||
#
|
||||
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
#
|
||||
# in the form expected by EDAC, for example: '2p1/2'
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.filename = ""
|
||||
self.positions = {'e': None, 't': None, 'p': None, 'a': None}
|
||||
self.emitter = ""
|
||||
self.initial_state = "1s"
|
||||
|
||||
def load(self, dirs=None):
|
||||
"""
|
||||
create the scan according to specification
|
||||
|
||||
@return a new Scan object which contains the created scan array.
|
||||
"""
|
||||
scan = Scan()
|
||||
positions = {}
|
||||
for axis in self.positions.keys():
|
||||
positions[axis] = np.atleast_1d(np.asarray(eval(self.positions[axis])))
|
||||
scan.define_scan(positions, self.emitter, self.initial_state)
|
||||
scan.filename = config.resolve_path(self.filename, dirs)
|
||||
return scan
|
||||
|
||||
|
||||
# noinspection PyMethodMayBeStatic
|
||||
class Project(object):
|
||||
class Project(config.ConfigurableObject):
|
||||
"""
|
||||
base class of a calculation project.
|
||||
|
||||
@ -609,17 +790,18 @@ class Project(object):
|
||||
#
|
||||
|
||||
## @var scans (list of Scan objects)
|
||||
# list of experimental or scan files for which calculations are to be run.
|
||||
# list of experimental scans for which calculations are to be run.
|
||||
#
|
||||
# the list must be populated by calling the add_scan() method.
|
||||
# this should be done in the create_project() function, or through the command line arguments.
|
||||
# during project initialization, this list must be populated with Scan, ScanLoader or ScanCreator objects.
|
||||
# while Scan objects contain all scan data, the latter two classes contain only scan specifications
|
||||
# which are expanded (i.e. files are loaded or arrays are calculated) just before the calculations start.
|
||||
# the Project.add_scan() method is a short-cut to create the respective scan object from few arguments.
|
||||
# before the calculation starts, all objects are converted into fully specified Scan objects
|
||||
# and scan data is loaded or calculated.
|
||||
#
|
||||
# the modulation function is calculated internally.
|
||||
# if your scan files contain the modulation function (as opposed to intensity),
|
||||
# you must add the files in the create_project() function.
|
||||
# the command line does not support loading modulation functions.
|
||||
#
|
||||
# @c scans must be considered read-only. use project methods to change it.
|
||||
# there are two ways to fill this list:
|
||||
# either the project code fills it as a part of its initialization (create_project),
|
||||
# or the list is populated via the run-file.
|
||||
|
||||
## @var domains (list of arbitrary objects)
|
||||
# list of domains for which calculations are to be run.
|
||||
@ -661,28 +843,22 @@ class Project(object):
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
|
||||
## @var data_dir
|
||||
# directory path to experimental data.
|
||||
## @var directories
|
||||
# dictionary for various directory paths.
|
||||
#
|
||||
# the project should load experimental data (scan files) from this path.
|
||||
# this attribute receives the --data-dir argument from the command line
|
||||
# if the project parses the common arguments (pmsco.set_common_args).
|
||||
#
|
||||
# it is up to the project to define where to load scan files from.
|
||||
# if the location of the files may depend on the machine or user account,
|
||||
# the user may want to specify the data path on the command line.
|
||||
|
||||
## @var output_dir (string)
|
||||
# directory path for data files produced during the calculation, including intermediate files.
|
||||
# home: user's home directory.
|
||||
# data: where to load experimental data (scan files) from.
|
||||
# project: directory of the project module.
|
||||
# output: where to write output and intermediate files.
|
||||
# temp: for temporary files.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var output_file (string)
|
||||
## @var output_file (Path)
|
||||
# file name root for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
# the file name should include the path. the path must also be set in @ref output_dir.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
# this is the concatenation of self.directories['output'] and self.job_name.
|
||||
# assignment to this property will update the two basic attributes.
|
||||
|
||||
## @var db_file (string)
|
||||
# name of an sqlite3 database file where the calculation results should be stored.
|
||||
@ -694,14 +870,17 @@ class Project(object):
|
||||
#
|
||||
# the actual wall time may be longer by the remaining time of running calculations.
|
||||
# running calculations will not be aborted.
|
||||
#
|
||||
# the time_limit property is an alternative representation as hours.
|
||||
# reading and writing accesses timedelta_limit.
|
||||
|
||||
## @var combined_scan
|
||||
# combined raw data from scans.
|
||||
# updated by add_scan().
|
||||
# updated by self.load_scans().
|
||||
|
||||
## @var combined_modf
|
||||
# combined modulation function from scans.
|
||||
# updated by add_scan().
|
||||
# updated by self.load_scans().
|
||||
|
||||
## @var files
|
||||
# list of all generated data files with metadata.
|
||||
@ -741,14 +920,17 @@ class Project(object):
|
||||
#
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._module = None
|
||||
self.mode = "single"
|
||||
self.job_name = ""
|
||||
self.job_name = "pmsco0"
|
||||
self.job_tags = {}
|
||||
self.git_hash = ""
|
||||
self.description = ""
|
||||
self.features = {}
|
||||
self.cluster_format = mc.FMT_EDAC
|
||||
self.cluster_generator = mc.LegacyClusterGenerator(self)
|
||||
self.cluster_format = pmsco.cluster.FMT_EDAC
|
||||
self.cluster_generator = pmsco.cluster.LegacyClusterGenerator(self)
|
||||
self._model_space = None
|
||||
self.scans = []
|
||||
self.domains = []
|
||||
self.optimizer_params = {
|
||||
@ -758,39 +940,170 @@ class Project(object):
|
||||
'recalc_seed': True,
|
||||
'table_file': ""
|
||||
}
|
||||
self.data_dir = ""
|
||||
self.output_dir = ""
|
||||
self.output_file = "pmsco_data"
|
||||
self.directories = {
|
||||
"home": Path.home(),
|
||||
"work": Path.cwd(),
|
||||
"data": "",
|
||||
"project": "",
|
||||
"output": "",
|
||||
"temp": ""}
|
||||
self.log_file = ""
|
||||
self.log_level = "WARNING"
|
||||
self.db_file = ':memory:'
|
||||
self.timedelta_limit = datetime.timedelta(days=1)
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
self.files = files.FileTracker()
|
||||
self.files = pmsco.files.FileTracker()
|
||||
self.keep_files = list(pmsco.files.FILE_CATEGORIES_TO_KEEP)
|
||||
self.keep_levels = 1
|
||||
self.keep_best = 10
|
||||
self.handler_classes = {
|
||||
'model': handlers.SingleModelHandler,
|
||||
'scan': handlers.ScanHandler,
|
||||
'domain': handlers.DomainHandler,
|
||||
'emit': handlers.EmitterHandler,
|
||||
'region': handlers.SingleRegionHandler
|
||||
'model': pmsco.handlers.SingleModelHandler,
|
||||
'scan': pmsco.handlers.ScanHandler,
|
||||
'domain': pmsco.handlers.DomainHandler,
|
||||
'emit': pmsco.handlers.EmitterHandler,
|
||||
'region': pmsco.handlers.SingleRegionHandler
|
||||
}
|
||||
self.atomic_scattering_factory = InternalAtomicCalculator
|
||||
self.multiple_scattering_factory = EdacCalculator
|
||||
self._tasks_fields = []
|
||||
self._db = database.ResultsDatabase()
|
||||
self._db = pmsco.database.ResultsDatabase()
|
||||
|
||||
def validate(self):
|
||||
"""
|
||||
validate the project parameters before starting the calculations
|
||||
|
||||
the method checks and fixes attributes that may cause trouble or go unnoticed if they are wrong.
|
||||
in addition, it fixes attributes which may be incomplete after loading a run-file.
|
||||
failed critical checks raise an exception (AssertionError, AttributeError, KeyError, ValueError).
|
||||
checks that cause an attribute do revert to default, are logged as warning.
|
||||
|
||||
the following attributes are fixed silently:
|
||||
- scattering factories that are declared as string are looked up in the project module.
|
||||
- place holders in the directories attribute are resolved.
|
||||
- place holders in the output_file attribute are resolved.
|
||||
- output_file and output_dir are made consistent (so that output_file includes output_dir).
|
||||
- the create_model_space() method is called if the model_space attribute is undefined.
|
||||
- scan data are loaded.
|
||||
|
||||
@note to check the syntax of a run-file, set the calculation mode to 'validate' and run pmsco.
|
||||
this will pass the validate method but will stop execution before calculations are started.
|
||||
|
||||
@raise AssertionError if a parameter is not correct.
|
||||
@raise AttributeError if a class name cannot be resolved.
|
||||
"""
|
||||
assert self.mode in {"single", "swarm", "genetic", "grid", "table", "test", "validate"}
|
||||
|
||||
if isinstance(self.atomic_scattering_factory, str):
|
||||
self.atomic_scattering_factory = getattr(self._module, self.atomic_scattering_factory)
|
||||
if isinstance(self.multiple_scattering_factory, str):
|
||||
self.multiple_scattering_factory = getattr(self._module, self.multiple_scattering_factory)
|
||||
|
||||
self.directories = {k: config.resolve_path(Path(v), self.directories) for k, v in self.directories.items()}
|
||||
|
||||
assert len(str(self.output_file))
|
||||
d = config.resolve_path(self.directories['output'], self.directories)
|
||||
f = config.resolve_path(self.output_file, self.directories)
|
||||
self.output_file = Path(d, f)
|
||||
self.directories['output'] = self.output_file.parent
|
||||
|
||||
if self._model_space is None or not self._model_space.start:
|
||||
logger.warning("undefined model_space attribute, trying project's create_model_space")
|
||||
self._model_space = self.create_model_space()
|
||||
|
||||
self.load_scans()
|
||||
|
||||
@property
|
||||
def data_dir(self):
|
||||
return self.directories['data']
|
||||
|
||||
@data_dir.setter
|
||||
def data_dir(self, path):
|
||||
self.directories['data'] = Path(path)
|
||||
|
||||
@property
|
||||
def output_dir(self):
|
||||
return self.directories['output']
|
||||
|
||||
@output_dir.setter
|
||||
def output_dir(self, path):
|
||||
self.directories['output'] = Path(path)
|
||||
|
||||
@property
|
||||
def output_file(self):
|
||||
return Path(self.directories['output'], self.job_name)
|
||||
|
||||
@output_file.setter
|
||||
def output_file(self, filename):
|
||||
"""
|
||||
set path and base name of output file.
|
||||
|
||||
path is copied to the output_dir attribute.
|
||||
the file stem is copied to the job_name attribute.
|
||||
|
||||
@param filename: (PathLike)
|
||||
"""
|
||||
p = Path(filename)
|
||||
s = str(p.parent)
|
||||
if s and s != ".":
|
||||
self.directories['output'] = p.parent
|
||||
s = str(p.stem)
|
||||
if s:
|
||||
self.job_name = s
|
||||
else:
|
||||
raise ValueError("invalid output file name")
|
||||
|
||||
@property
|
||||
def time_limit(self):
|
||||
return self.timedelta_limit.total_seconds() / 3600 / 24
|
||||
|
||||
@time_limit.setter
|
||||
def time_limit(self, hours):
|
||||
self.timedelta_limit = datetime.timedelta(hours=hours)
|
||||
|
||||
def create_model_space(self):
|
||||
"""
|
||||
create a project.ModelSpace object which defines the allowed range for model parameters.
|
||||
|
||||
this method must be implemented by the actual project class.
|
||||
the ModelSpace object must declare all model parameters used in the project.
|
||||
there are three ways for a project to declare the model space:
|
||||
1. implement the @ref create_model_space method.
|
||||
this is the older way and may become deprecated in a future version.
|
||||
2. assign a ModelSpace to the self.model_space property directly
|
||||
(in the @ref validate method).
|
||||
3. declare the model space in the run-file.
|
||||
|
||||
this method is called by the validate method only if self._model_space is undefined.
|
||||
|
||||
@return ModelSpace object
|
||||
"""
|
||||
return None
|
||||
|
||||
@property
|
||||
def model_space(self):
|
||||
"""
|
||||
ModelSpace object that defines the allowed range for model parameters.
|
||||
|
||||
there are three ways for a project to declare the model space:
|
||||
1. implement the @ref create_model_space method.
|
||||
this is the older way and may become deprecated in a future version.
|
||||
2. assign a ModelSpace to the self.model_space property directly
|
||||
(in the @ref validate method).
|
||||
3. declare the model space in the run-file.
|
||||
|
||||
initially, this property is None.
|
||||
"""
|
||||
return self._model_space
|
||||
|
||||
@model_space.setter
|
||||
def model_space(self, value):
|
||||
if isinstance(value, ModelSpace):
|
||||
self._model_space = value
|
||||
elif hasattr(value, 'items'):
|
||||
self._model_space = ModelSpace()
|
||||
self._model_space.set_param_dict(value)
|
||||
else:
|
||||
raise ValueError("incompatible object type")
|
||||
|
||||
def create_params(self, model, index):
|
||||
"""
|
||||
create a CalculatorParams object given the model parameters and calculation index.
|
||||
@ -816,11 +1129,15 @@ class Project(object):
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, positions=None):
|
||||
"""
|
||||
add the file name of reference experiment and load it.
|
||||
|
||||
the extension must be one of msc_data.DATATYPES (case insensitive)
|
||||
add a scan specification to the scans list.
|
||||
|
||||
this is a shortcut for adding a ScanCreator or ScanLoader object to the self.scans list.
|
||||
the creator or loader are converted into full Scan objects just before the calculation starts
|
||||
(in the self.setup() method).
|
||||
|
||||
the extension must be one of pmsco.data.DATATYPES (case insensitive)
|
||||
corresponding to the meaning of the columns in the file.
|
||||
|
||||
caution: EDAC can only calculate equidistant, rectangular scans.
|
||||
@ -831,9 +1148,6 @@ class Project(object):
|
||||
* intensity vs theta, phi, or alpha
|
||||
* intensity vs theta and phi (hemisphere or hologram scan)
|
||||
|
||||
the method calculates the modulation function if @c is_modf is @c False.
|
||||
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
|
||||
|
||||
@param filename: (string) file name of the experimental data, possibly including a path.
|
||||
the file is not loaded when the optional positions argument is present,
|
||||
but the filename may serve as basename for output files (e.g. modulation function).
|
||||
@ -852,57 +1166,64 @@ class Project(object):
|
||||
@param is_modf: (bool) declares whether the file contains the modulation function (True),
|
||||
or intensity (False, default). In the latter case, the modulation function is calculated internally.
|
||||
|
||||
@param modf_model: (dict) model parameters to be passed to the modulation function.
|
||||
|
||||
@return (Scan) the new scan object (which is also a member of self.scans).
|
||||
"""
|
||||
scan = Scan()
|
||||
if positions is not None:
|
||||
scan.define_scan(positions, emitter, initial_state)
|
||||
scan.filename = filename
|
||||
scan = ScanCreator()
|
||||
scan.positions = positions
|
||||
else:
|
||||
scan.import_scan_file(filename, emitter, initial_state)
|
||||
scan = ScanLoader()
|
||||
scan.is_modf = is_modf
|
||||
|
||||
scan.filename = filename
|
||||
scan.emitter = emitter
|
||||
scan.initial_state = initial_state
|
||||
self.scans.append(scan)
|
||||
|
||||
if modf_model is None:
|
||||
modf_model = {}
|
||||
return scan
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if is_modf:
|
||||
scan.modulation = scan.raw_data
|
||||
else:
|
||||
def load_scans(self):
|
||||
"""
|
||||
load all scan data.
|
||||
|
||||
initially, the self.scans list may contain objects of different classes (Scan, ScanLoader, ScanCreator)
|
||||
depending on the project initialization.
|
||||
this method loads all data, so that the scans list contains only Scan objects.
|
||||
|
||||
also, the self.combined_scan and self.combined_modf fields are calculated from the scans.
|
||||
"""
|
||||
has_raw_data = True
|
||||
has_mod_func = True
|
||||
loaded_scans = []
|
||||
|
||||
for idx, scan in enumerate(self.scans):
|
||||
scan = scan.load(dirs=self.directories)
|
||||
loaded_scans.append(scan)
|
||||
if scan.modulation is None:
|
||||
try:
|
||||
scan.modulation = self.calc_modulation(scan.raw_data, modf_model)
|
||||
scan.modulation = self.calc_modulation(scan.raw_data, self.model_space.start)
|
||||
except ValueError:
|
||||
logger.error("error calculating the modulation function of experimental data.")
|
||||
scan.modulation = None
|
||||
else:
|
||||
scan.modulation = None
|
||||
logger.error(f"error calculating the modulation function of scan {idx}.")
|
||||
has_raw_data = has_raw_data and scan.raw_data is not None
|
||||
has_mod_func = has_mod_func and scan.modulation is not None
|
||||
self.scans = loaded_scans
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if self.combined_scan is not None:
|
||||
dt = md.common_dtype((self.combined_scan, scan.raw_data))
|
||||
d1 = md.restructure_data(self.combined_scan, dt)
|
||||
d2 = md.restructure_data(scan.raw_data, dt)
|
||||
self.combined_scan = np.hstack((d1, d2))
|
||||
else:
|
||||
self.combined_scan = scan.raw_data.copy()
|
||||
if has_raw_data:
|
||||
stack1 = [scan.raw_data for scan in self.scans]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
self.combined_scan = np.hstack(tuple(stack2))
|
||||
else:
|
||||
self.combined_scan = None
|
||||
|
||||
if scan.modulation is not None:
|
||||
if self.combined_modf is not None:
|
||||
dt = md.common_dtype((self.combined_modf, scan.modulation))
|
||||
d1 = md.restructure_data(self.combined_modf, dt)
|
||||
d2 = md.restructure_data(scan.modulation, dt)
|
||||
self.combined_modf = np.hstack((d1, d2))
|
||||
else:
|
||||
self.combined_modf = scan.modulation.copy()
|
||||
if has_mod_func:
|
||||
stack1 = [scan.modulation for scan in self.scans]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
self.combined_modf = np.hstack(tuple(stack2))
|
||||
else:
|
||||
self.combined_modf = None
|
||||
|
||||
return scan
|
||||
|
||||
def clear_domains(self):
|
||||
"""
|
||||
clear domains.
|
||||
@ -933,42 +1254,6 @@ class Project(object):
|
||||
"""
|
||||
self.domains.append(domain)
|
||||
|
||||
def set_output(self, filename):
|
||||
"""
|
||||
set path and base name of output file.
|
||||
|
||||
path and name are copied to the output_file attribute.
|
||||
path is copied to the output_dir attribute.
|
||||
|
||||
if the path is missing, the destination is the current working directory.
|
||||
"""
|
||||
self.output_file = filename
|
||||
path, name = os.path.split(filename)
|
||||
self.output_dir = path
|
||||
self.job_name = name
|
||||
|
||||
def set_timedelta_limit(self, timedelta, margin_minutes=10):
|
||||
"""
|
||||
set the walltime limit with a safety margin.
|
||||
|
||||
this method sets the internal self.timedelta_limit attribute.
|
||||
by default, a safety margin of 10 minutes is subtracted to the main argument
|
||||
in order to increase the probability that the process ends in time.
|
||||
if this is not wanted, the project class may override the method and provide its own margin.
|
||||
|
||||
the method is typically called with the command line time limit from the main module.
|
||||
|
||||
@note the safety margin could be applied at various levels.
|
||||
it is done here because it can easily be overridden by the project subclass.
|
||||
to keep run scripts simple, the command line can be given the same time limit
|
||||
as the job scheduler of the computing cluster.
|
||||
|
||||
@param timedelta: (datetime.timedelta) max. duration of the calculation process (wall time).
|
||||
|
||||
@param margin_minutes: (int) safety margin in minutes to subtract from timedelta.
|
||||
"""
|
||||
self.timedelta_limit = timedelta - datetime.timedelta(minutes=margin_minutes)
|
||||
|
||||
def log_project_args(self):
|
||||
"""
|
||||
send some common project attributes to the log.
|
||||
@ -981,6 +1266,14 @@ class Project(object):
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
for key in self.directories:
|
||||
val = self.directories[key]
|
||||
lev = logging.WARNING if val else logging.DEBUG
|
||||
logger.log(lev, f"directories['{key}']: {val}")
|
||||
|
||||
logger.warning("output file: {0}".format(self.output_file))
|
||||
logger.warning("database: {0}".format(self.db_file))
|
||||
|
||||
logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
|
||||
logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
|
||||
logger.warning("optimization mode: {0}".format(self.mode))
|
||||
@ -990,15 +1283,11 @@ class Project(object):
|
||||
lev = logging.WARNING if val else logging.DEBUG
|
||||
logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
|
||||
|
||||
logger.warning("data directory: {0}".format(self.data_dir))
|
||||
logger.warning("output file: {0}".format(self.output_file))
|
||||
logger.warning("database: {0}".format(self.db_file))
|
||||
|
||||
_files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
|
||||
_files_to_keep = pmsco.files.FILE_CATEGORIES - self.files.categories_to_delete
|
||||
logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
|
||||
|
||||
for idx, scan in enumerate(self.scans):
|
||||
logger.warning(f"scan {idx}: {scan.filename} ({scan.emitter} {scan.initial_state}")
|
||||
logger.warning(f"scan {idx}: {scan.filename} ({scan.emitter} {scan.initial_state})")
|
||||
for idx, dom in enumerate(self.domains):
|
||||
logger.warning(f"domain {idx}: {dom}")
|
||||
|
||||
@ -1247,16 +1536,26 @@ class Project(object):
|
||||
"""
|
||||
self.git_hash = self.get_git_hash()
|
||||
fields = ["rfac"]
|
||||
fields.extend(dispatch.CalcID._fields)
|
||||
fields.extend(pmsco.dispatch.CalcID._fields)
|
||||
fields.append("secs")
|
||||
fields = ["_" + f for f in fields]
|
||||
mspace = self.create_model_space()
|
||||
model_fields = list(mspace.start.keys())
|
||||
model_fields = list(self.model_space.start.keys())
|
||||
model_fields.sort(key=lambda name: name.lower())
|
||||
fields.extend(model_fields)
|
||||
self._tasks_fields = fields
|
||||
|
||||
with open(self.output_file + ".tasks.dat", "w") as outfile:
|
||||
if 'all' in self.keep_files:
|
||||
cats = set([])
|
||||
else:
|
||||
cats = pmsco.files.FILE_CATEGORIES - set(self.keep_files)
|
||||
cats -= {'report'}
|
||||
if self.mode == 'single':
|
||||
cats -= {'model'}
|
||||
self.files.categories_to_delete = cats
|
||||
|
||||
Path(self.output_file).parent.mkdir(parents=True, exist_ok=True)
|
||||
tasks_file = Path(self.output_file).with_suffix(".tasks.dat")
|
||||
with open(tasks_file, "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(fields))
|
||||
outfile.write("\n")
|
||||
@ -1311,7 +1610,8 @@ class Project(object):
|
||||
values_dict['_rfac'] = parent_task.rfac
|
||||
values_dict['_secs'] = parent_task.time.total_seconds()
|
||||
values_list = [values_dict[field] for field in self._tasks_fields]
|
||||
with open(self.output_file + ".tasks.dat", "a") as outfile:
|
||||
tasks_file = Path(self.output_file).with_suffix(".tasks.dat")
|
||||
with open(tasks_file, "a") as outfile:
|
||||
outfile.write(" ".join(format(value) for value in values_list) + "\n")
|
||||
|
||||
db_id = self._db.insert_result(parent_task.id, values_dict)
|
||||
@ -1548,11 +1848,11 @@ class Project(object):
|
||||
"""
|
||||
_files = {}
|
||||
xyz_filename = filename + ".xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
cluster.save_to_file(xyz_filename, fmt=pmsco.cluster.FMT_XYZ)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
xyz_filename = filename + ".emit.xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ, emitters_only=True)
|
||||
cluster.save_to_file(xyz_filename, fmt=pmsco.cluster.FMT_XYZ, emitters_only=True)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
return _files
|
||||
|
309
pmsco/schedule.py
Normal file
309
pmsco/schedule.py
Normal file
@ -0,0 +1,309 @@
|
||||
"""
|
||||
@package pmsco.schedule
|
||||
job schedule interface
|
||||
|
||||
this module defines common infrastructure to submit a pmsco calculation job to a job scheduler such as slurm.
|
||||
|
||||
the schedule can be defined as part of the run-file (see pmsco module).
|
||||
users may derive sub-classes in a separate module to adapt to their own computing cluster.
|
||||
|
||||
the basic call sequence is:
|
||||
1. create a schedule object.
|
||||
2. initialize its properties with job parameters.
|
||||
3. validate()
|
||||
4. submit()
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import collections.abc
|
||||
import commentjson as json
|
||||
import datetime
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import subprocess
|
||||
import pmsco.config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JobSchedule(pmsco.config.ConfigurableObject):
|
||||
"""
|
||||
base class for job schedule
|
||||
|
||||
this class defines the abstract interface and some utilities.
|
||||
derived classes may override any method, but should call the inherited method.
|
||||
|
||||
usage:
|
||||
1. create object, assigning a project instance.
|
||||
2. assign run_file.
|
||||
3. call validate.
|
||||
4. call submit.
|
||||
|
||||
this class' properties should not be listed in the run file - they will be overwritten.
|
||||
"""
|
||||
|
||||
## @var enabled (bool)
|
||||
#
|
||||
# this parameter signals whether pmsco should schedule a job or run the calculation.
|
||||
# it is not directly used by the schedule classes but by the pmsco module.
|
||||
# it must be defined in the run file and set to true to submit the job to a scheduler.
|
||||
# it is set to false in the run file copied to the job directory so that the job script starts the calculation.
|
||||
|
||||
def __init__(self, project):
|
||||
super(JobSchedule, self).__init__()
|
||||
self.project = project
|
||||
self.enabled = False
|
||||
self.run_dict = {}
|
||||
self.job_dir = Path()
|
||||
self.job_file = Path()
|
||||
self.run_file = Path()
|
||||
# directory that contains the pmsco and projects directories
|
||||
self.pmsco_root = Path(__file__).parent.parent
|
||||
|
||||
def validate(self):
|
||||
"""
|
||||
validate the job parameters.
|
||||
|
||||
make sure all object attributes are correct for submission.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.pmsco_root = Path(self.project.directories['pmsco']).parent
|
||||
output_dir = Path(self.project.directories['output'])
|
||||
|
||||
assert self.pmsco_root.is_dir()
|
||||
assert (self.pmsco_root / "pmsco").is_dir()
|
||||
assert (self.pmsco_root / "projects").is_dir()
|
||||
assert output_dir.is_dir()
|
||||
assert self.project.job_name
|
||||
|
||||
self.job_dir = output_dir / self.project.job_name
|
||||
self.job_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.job_file = (self.job_dir / self.project.job_name).with_suffix(".sh")
|
||||
self.run_file = (self.job_dir / self.project.job_name).with_suffix(".json")
|
||||
|
||||
def submit(self):
|
||||
"""
|
||||
submit the job to the scheduler.
|
||||
|
||||
as of this class, the method does to following:
|
||||
|
||||
1. copy source files
|
||||
2. copy a patched version of the run file.
|
||||
3. write the job file (_write_job_file must be implemented by a derived class).
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self._copy_source()
|
||||
self._fix_run_file()
|
||||
self._write_run_file()
|
||||
self._write_job_file()
|
||||
|
||||
def _copy_source(self):
|
||||
"""
|
||||
copy the source files to the job directory.
|
||||
|
||||
the source_dir and job_dir attributes must be correct.
|
||||
the job_dir directory must not exist and will be created.
|
||||
|
||||
this is a utility method used internally by derived classes.
|
||||
|
||||
job_dir/pmsco/pmsco/**
|
||||
job_dir/pmsco/projects/**
|
||||
job_dir/job.sh
|
||||
job_dir/job.json
|
||||
|
||||
@return: None
|
||||
"""
|
||||
|
||||
source = self.pmsco_root
|
||||
dest = self.job_dir / "pmsco"
|
||||
ignore = shutil.ignore_patterns(".*", "~*", "*~")
|
||||
shutil.copytree(source / "pmsco", dest / "pmsco", ignore=ignore)
|
||||
shutil.copytree(source / "projects", dest / "projects", ignore=ignore)
|
||||
|
||||
def _fix_run_file(self):
|
||||
"""
|
||||
fix the run file.
|
||||
|
||||
patch some entries of self.run_dict so that it can be used as run file.
|
||||
the following changes are made:
|
||||
1. set schedule.enabled to false so that the calculation is run.
|
||||
2. set the output directory to the job directory.
|
||||
3. set the log file to the job directory.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.run_dict['schedule']['enabled'] = False
|
||||
self.run_dict['project']['directories']['output'] = str(self.job_dir)
|
||||
self.run_dict['project']['log_file'] = str((self.job_dir / self.project.job_name).with_suffix(".log"))
|
||||
|
||||
def _write_run_file(self):
|
||||
"""
|
||||
copy the run file.
|
||||
|
||||
this is a JSON dump of self.run_dict to the self.run_file file.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
with open(self.run_file, "wt") as f:
|
||||
json.dump(self.run_dict, f, indent=2)
|
||||
|
||||
def _write_job_file(self):
|
||||
"""
|
||||
create the job script.
|
||||
|
||||
this method must be implemented by a derived class.
|
||||
the script must be written to the self.job_file file.
|
||||
don't forget to make the file executable.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class SlurmSchedule(JobSchedule):
|
||||
"""
|
||||
job schedule for a slurm scheduler.
|
||||
|
||||
this class implements commonly used features of the slurm scheduler.
|
||||
host-specific features and the creation of the job file should be done in a derived class.
|
||||
derived classes must, in particular, implement the _write_job_file method.
|
||||
they can override other methods, too, but should call the inherited method first.
|
||||
|
||||
1. copy the source trees (pmsco and projects) to the job directory
|
||||
2. copy a patched version of the run file.
|
||||
3. call the submission command
|
||||
|
||||
the public properties of this class should be assigned from the run file.
|
||||
"""
|
||||
def __init__(self, project):
|
||||
super(SlurmSchedule, self).__init__(project)
|
||||
self.host = ""
|
||||
self.nodes = 1
|
||||
self.tasks_per_node = 8
|
||||
self.wall_time = datetime.timedelta(hours=1)
|
||||
self.signal_time = 600
|
||||
self.manual = True
|
||||
|
||||
@staticmethod
|
||||
def parse_timedelta(td):
|
||||
"""
|
||||
parse time delta input formats
|
||||
|
||||
converts a string or dictionary from run-file into datetime.timedelta.
|
||||
|
||||
@param td:
|
||||
str: [days-]hours[:minutes[:seconds]]
|
||||
dict: days, hours, minutes, seconds - at least one needs to be defined. values must be numeric.
|
||||
datetime.timedelta - native type
|
||||
@return: datetime.timedelta
|
||||
"""
|
||||
if isinstance(td, str):
|
||||
dt = {}
|
||||
d = td.split("-")
|
||||
if len(d) > 1:
|
||||
dt['days'] = float(d.pop(0))
|
||||
t = d[0].split(":")
|
||||
try:
|
||||
dt['hours'] = float(t.pop(0))
|
||||
dt['minutes'] = float(t.pop(0))
|
||||
dt['seconds'] = float(t.pop(0))
|
||||
except (IndexError, ValueError):
|
||||
pass
|
||||
td = datetime.timedelta(**dt)
|
||||
elif isinstance(td, collections.abc.Mapping):
|
||||
td = datetime.timedelta(**td)
|
||||
return td
|
||||
|
||||
def validate(self):
|
||||
super(SlurmSchedule, self).validate()
|
||||
self.wall_time = self.parse_timedelta(self.wall_time)
|
||||
assert self.job_dir.is_absolute()
|
||||
|
||||
def submit(self):
|
||||
"""
|
||||
call the sbatch command
|
||||
|
||||
if manual is true, the job files are generated but the job is not submitted.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
super(SlurmSchedule, self).submit()
|
||||
args = ['sbatch', str(self.job_file)]
|
||||
print(" ".join(args))
|
||||
if self.manual:
|
||||
print("manual run - job files created but not submitted")
|
||||
else:
|
||||
cp = subprocess.run(args)
|
||||
cp.check_returncode()
|
||||
|
||||
|
||||
class PsiRaSchedule(SlurmSchedule):
|
||||
"""
|
||||
job shedule for the Ra cluster at PSI.
|
||||
|
||||
this class selects specific features of the Ra cluster,
|
||||
such as the partition and node type (24 or 32 cores).
|
||||
it also implements the _write_job_file method.
|
||||
"""
|
||||
|
||||
## @var partition (str)
|
||||
#
|
||||
# the partition is selected based on wall time and number of tasks by the validate() method.
|
||||
# it should not be listed in the run file.
|
||||
|
||||
def __init__(self, project):
|
||||
super(PsiRaSchedule, self).__init__(project)
|
||||
self.partition = "shared"
|
||||
|
||||
def validate(self):
|
||||
super(PsiRaSchedule, self).validate()
|
||||
assert self.nodes <= 2
|
||||
assert self.tasks_per_node <= 24 or self.tasks_per_node == 32
|
||||
assert self.wall_time.total_seconds() >= 60
|
||||
if self.wall_time.total_seconds() > 24 * 60 * 60:
|
||||
self.partition = "week"
|
||||
elif self.tasks_per_node < 24:
|
||||
self.partition = "shared"
|
||||
else:
|
||||
self.partition = "day"
|
||||
assert self.partition in ["day", "week", "shared"]
|
||||
|
||||
def _write_job_file(self):
|
||||
lines = []
|
||||
|
||||
lines.append('#!/bin/bash')
|
||||
lines.append('#SBATCH --export=NONE')
|
||||
lines.append(f'#SBATCH --job-name="{self.project.job_name}"')
|
||||
lines.append(f'#SBATCH --partition={self.partition}')
|
||||
lines.append(f'#SBATCH --time={int(self.wall_time.total_seconds() / 60)}')
|
||||
lines.append(f'#SBATCH --nodes={self.nodes}')
|
||||
lines.append(f'#SBATCH --ntasks-per-node={self.tasks_per_node}')
|
||||
if self.tasks_per_node > 24:
|
||||
lines.append('#SBATCH --cores-per-socket=16')
|
||||
# 0 - 65535 seconds
|
||||
# currently, PMSCO does not react to signals properly
|
||||
# lines.append(f'#SBATCH --signal=TERM@{self.signal_time}')
|
||||
lines.append(f'#SBATCH --output="{self.project.job_name}.o.%j"')
|
||||
lines.append(f'#SBATCH --error="{self.project.job_name}.e.%j"')
|
||||
lines.append('module load psi-python36/4.4.0')
|
||||
lines.append('module load gcc/4.8.5')
|
||||
lines.append('module load openmpi/3.1.3')
|
||||
lines.append('source activate pmsco')
|
||||
lines.append(f'cd "{self.job_dir}"')
|
||||
lines.append(f'mpirun python pmsco/pmsco -r {self.run_file.name}')
|
||||
lines.append(f'cd "{self.job_dir}"')
|
||||
lines.append('rm -rf pmsco')
|
||||
lines.append('exit 0')
|
||||
|
||||
self.job_file.write_text("\n".join(lines))
|
||||
self.job_file.chmod(0o755)
|
Reference in New Issue
Block a user