public release 3.0.0 - see README and CHANGES for details
This commit is contained in:
592
pmsco/project.py
592
pmsco/project.py
@ -19,36 +19,32 @@ the ModelSpace and CalculatorParams classes are typically used unchanged.
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2015 by Paul Scherrer Institut @n
|
||||
@copyright (c) 2015-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import datetime
|
||||
import git
|
||||
import logging
|
||||
import numpy as np
|
||||
import os.path
|
||||
from pathlib import Path
|
||||
import socket
|
||||
import sys
|
||||
|
||||
from pmsco.calculators.calculator import InternalAtomicCalculator
|
||||
from pmsco.calculators.edac import EdacCalculator
|
||||
import pmsco.cluster as mc
|
||||
import pmsco.cluster
|
||||
import pmsco.config as config
|
||||
from pmsco.compat import open
|
||||
import pmsco.data as md
|
||||
import pmsco.database as database
|
||||
import pmsco.dispatch as dispatch
|
||||
import pmsco.files as files
|
||||
import pmsco.handlers as handlers
|
||||
import pmsco.database
|
||||
import pmsco.dispatch
|
||||
import pmsco.files
|
||||
import pmsco.handlers
|
||||
from pmsco.helpers import BraceMessage as BMsg
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -157,6 +153,34 @@ class ModelSpace(object):
|
||||
"""
|
||||
return ParamSpace(self.start[name], self.min[name], self.max[name], self.step[name])
|
||||
|
||||
def set_param_dict(self, d):
|
||||
"""
|
||||
initialize model space from dictionary.
|
||||
|
||||
@param d: dictionary with two levels:
|
||||
the top level are parameter names,
|
||||
the second level the space descriptors 'start', 'min', 'max', 'step' and 'width'.
|
||||
see add_param() for possible combinations.
|
||||
@return: None
|
||||
"""
|
||||
self.__init__()
|
||||
for k, v in d.items():
|
||||
self.add_param(k, **v)
|
||||
|
||||
def get_param_dict(self):
|
||||
"""
|
||||
return model space parameters in dictionary form
|
||||
|
||||
the top level are parameter names,
|
||||
the second level the space descriptors 'start', 'min', 'max' and 'step'.
|
||||
|
||||
@return: dict
|
||||
"""
|
||||
d = {}
|
||||
for name in self.start:
|
||||
d[name] = {self.start[name], self.min[name], self.max[name], self.step[name]}
|
||||
return d
|
||||
|
||||
|
||||
class CalculatorParams(object):
|
||||
"""
|
||||
@ -568,9 +592,166 @@ class Scan(object):
|
||||
self.raw_data[dim] = grid[i].reshape(-1)
|
||||
self.raw_data['i'] = 1
|
||||
|
||||
def load(self):
|
||||
return self
|
||||
|
||||
|
||||
class ScanKey(config.ConfigurableObject):
|
||||
"""
|
||||
create a Scan object based on a project-supplied dictionary
|
||||
|
||||
this class can be used in a run file to create a scan object based on the scan_dict attribute of the project.
|
||||
this may be convenient if you're project should selectively use scans out of a long list of data files
|
||||
and you don't want to clutter up the run file with parameters that don't change.
|
||||
|
||||
to do so, set the key property to match an item of scan_dict.
|
||||
the load method will look up the corresponding scan_dict item and construct the final Scan object.
|
||||
"""
|
||||
def __init__(self, project=None):
|
||||
super().__init__()
|
||||
self.key = ""
|
||||
self.project = project
|
||||
|
||||
def load(self, dirs=None):
|
||||
"""
|
||||
load the selected scan as specified in the project's scan dictionary
|
||||
|
||||
the method uses ScanLoader or ScanCreator as an intermediate.
|
||||
|
||||
@return a new Scan object which contains the loaded data.
|
||||
"""
|
||||
scan_spec = self.project.scan_dict[self.key]
|
||||
if hasattr(scan_spec, 'positions'):
|
||||
loader = ScanCreator()
|
||||
else:
|
||||
loader = ScanLoader()
|
||||
for k, v in scan_spec.items():
|
||||
setattr(loader, k, v)
|
||||
scan = loader.load(dirs=dirs)
|
||||
return scan
|
||||
|
||||
|
||||
class ScanLoader(config.ConfigurableObject):
|
||||
"""
|
||||
create a Scan object from a data file reference
|
||||
|
||||
this class can be used in a run file to create a scan object from an experimental data file.
|
||||
to do so, fill the properties with values as documented.
|
||||
the load() method is called when the project is run.
|
||||
"""
|
||||
|
||||
## @var filename (string)
|
||||
# file name from which the scan should be loaded.
|
||||
# the file name can contain a format specifier like {project} to include the base path.
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
|
||||
# of photo-emitting atoms.
|
||||
# the interpretation of this string is up to the project and its cluster generator.
|
||||
# it should, however, always start with a chemical element symbol.
|
||||
#
|
||||
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
#
|
||||
# in the form expected by EDAC, for example: '2p1/2'
|
||||
|
||||
## @var is_modf (bool)
|
||||
# declares whether the data file contains the modulation function rather than intensity values
|
||||
#
|
||||
# if false, the project will calculate a modulation function from the raw data
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.filename = ""
|
||||
self.emitter = ""
|
||||
self.initial_state = "1s"
|
||||
self.is_modf = False
|
||||
|
||||
def load(self, dirs=None):
|
||||
"""
|
||||
load the scan according to specification
|
||||
|
||||
create a new Scan object and load the file by calling Scan.import_scan_file().
|
||||
|
||||
@return a new Scan object which contains the loaded data file.
|
||||
"""
|
||||
scan = Scan()
|
||||
filename = config.resolve_path(self.filename, dirs)
|
||||
scan.import_scan_file(filename, self.emitter, self.initial_state)
|
||||
if self.is_modf:
|
||||
scan.modulation = scan.raw_data
|
||||
return scan
|
||||
|
||||
|
||||
class ScanCreator(config.ConfigurableObject):
|
||||
"""
|
||||
create a Scan object from string expressions
|
||||
|
||||
this class can be used in a run file to create a scan object from python expressions,
|
||||
such as lists, ranges or numpy functions.
|
||||
to do so, fill the properties with values as documented.
|
||||
the load() method is called when the project is run.
|
||||
|
||||
@note the raw_data property of the scan cannot be filled this way.
|
||||
thus, the class is useful in `single` calculation mode only.
|
||||
"""
|
||||
|
||||
## @var filename (string)
|
||||
# name of the file which should receive the scan data.
|
||||
# the file name can contain a format specifier like {project} to include the base path.
|
||||
|
||||
## @var positions (dict)
|
||||
# dictionary specifying the scan positions
|
||||
#
|
||||
# the dictionary must contain four keys: 'e', 't', 'p', 'a' representing the four scan axes.
|
||||
# each key holds a string that contains a python expression.
|
||||
# the string is evaluated using python's built-in eval() function.
|
||||
# the expression must evaluate to an iterable object or numpy ndarray of the scan positions.
|
||||
# the `np` namespace can be used to access numpy functions.
|
||||
#
|
||||
# example:
|
||||
# the following dictionary generates a hemispherical scan
|
||||
# self.position = {'e': '100', 't': 'np.linspace(0, 90, 91)', 'p': 'range(0, 360, 2)', 'a': '0'}
|
||||
|
||||
## @var emitter (string)
|
||||
# chemical symbol and, optionally following, further specification (chemical state, environment, ...)
|
||||
# of photo-emitting atoms.
|
||||
# the interpretation of this string is up to the project and its cluster generator.
|
||||
# it should, however, always start with a chemical element symbol.
|
||||
#
|
||||
# examples: 'Ca' (calcium), 'CA' (carbon A), 'C a' (carbon a), 'C 1' (carbon one), 'N=O', 'FeIII'.
|
||||
|
||||
## @var initial_state (string)
|
||||
# nl term of initial state
|
||||
#
|
||||
# in the form expected by EDAC, for example: '2p1/2'
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.filename = ""
|
||||
self.positions = {'e': None, 't': None, 'p': None, 'a': None}
|
||||
self.emitter = ""
|
||||
self.initial_state = "1s"
|
||||
|
||||
def load(self, dirs=None):
|
||||
"""
|
||||
create the scan according to specification
|
||||
|
||||
@return a new Scan object which contains the created scan array.
|
||||
"""
|
||||
scan = Scan()
|
||||
positions = {}
|
||||
for axis in self.positions.keys():
|
||||
positions[axis] = np.atleast_1d(np.asarray(eval(self.positions[axis])))
|
||||
scan.define_scan(positions, self.emitter, self.initial_state)
|
||||
scan.filename = config.resolve_path(self.filename, dirs)
|
||||
return scan
|
||||
|
||||
|
||||
# noinspection PyMethodMayBeStatic
|
||||
class Project(object):
|
||||
class Project(config.ConfigurableObject):
|
||||
"""
|
||||
base class of a calculation project.
|
||||
|
||||
@ -609,17 +790,18 @@ class Project(object):
|
||||
#
|
||||
|
||||
## @var scans (list of Scan objects)
|
||||
# list of experimental or scan files for which calculations are to be run.
|
||||
# list of experimental scans for which calculations are to be run.
|
||||
#
|
||||
# the list must be populated by calling the add_scan() method.
|
||||
# this should be done in the create_project() function, or through the command line arguments.
|
||||
# during project initialization, this list must be populated with Scan, ScanLoader or ScanCreator objects.
|
||||
# while Scan objects contain all scan data, the latter two classes contain only scan specifications
|
||||
# which are expanded (i.e. files are loaded or arrays are calculated) just before the calculations start.
|
||||
# the Project.add_scan() method is a short-cut to create the respective scan object from few arguments.
|
||||
# before the calculation starts, all objects are converted into fully specified Scan objects
|
||||
# and scan data is loaded or calculated.
|
||||
#
|
||||
# the modulation function is calculated internally.
|
||||
# if your scan files contain the modulation function (as opposed to intensity),
|
||||
# you must add the files in the create_project() function.
|
||||
# the command line does not support loading modulation functions.
|
||||
#
|
||||
# @c scans must be considered read-only. use project methods to change it.
|
||||
# there are two ways to fill this list:
|
||||
# either the project code fills it as a part of its initialization (create_project),
|
||||
# or the list is populated via the run-file.
|
||||
|
||||
## @var domains (list of arbitrary objects)
|
||||
# list of domains for which calculations are to be run.
|
||||
@ -661,28 +843,22 @@ class Project(object):
|
||||
# set this argument to False only if the calculation is a continuation of a previous one
|
||||
# without any changes to the code.
|
||||
|
||||
## @var data_dir
|
||||
# directory path to experimental data.
|
||||
## @var directories
|
||||
# dictionary for various directory paths.
|
||||
#
|
||||
# the project should load experimental data (scan files) from this path.
|
||||
# this attribute receives the --data-dir argument from the command line
|
||||
# if the project parses the common arguments (pmsco.set_common_args).
|
||||
#
|
||||
# it is up to the project to define where to load scan files from.
|
||||
# if the location of the files may depend on the machine or user account,
|
||||
# the user may want to specify the data path on the command line.
|
||||
|
||||
## @var output_dir (string)
|
||||
# directory path for data files produced during the calculation, including intermediate files.
|
||||
# home: user's home directory.
|
||||
# data: where to load experimental data (scan files) from.
|
||||
# project: directory of the project module.
|
||||
# output: where to write output and intermediate files.
|
||||
# temp: for temporary files.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
|
||||
## @var output_file (string)
|
||||
## @var output_file (Path)
|
||||
# file name root for data files produced during the calculation, including intermediate files.
|
||||
#
|
||||
# the file name should include the path. the path must also be set in @ref output_dir.
|
||||
#
|
||||
# output_dir and output_file are set at once by @ref set_output.
|
||||
# this is the concatenation of self.directories['output'] and self.job_name.
|
||||
# assignment to this property will update the two basic attributes.
|
||||
|
||||
## @var db_file (string)
|
||||
# name of an sqlite3 database file where the calculation results should be stored.
|
||||
@ -694,14 +870,17 @@ class Project(object):
|
||||
#
|
||||
# the actual wall time may be longer by the remaining time of running calculations.
|
||||
# running calculations will not be aborted.
|
||||
#
|
||||
# the time_limit property is an alternative representation as hours.
|
||||
# reading and writing accesses timedelta_limit.
|
||||
|
||||
## @var combined_scan
|
||||
# combined raw data from scans.
|
||||
# updated by add_scan().
|
||||
# updated by self.load_scans().
|
||||
|
||||
## @var combined_modf
|
||||
# combined modulation function from scans.
|
||||
# updated by add_scan().
|
||||
# updated by self.load_scans().
|
||||
|
||||
## @var files
|
||||
# list of all generated data files with metadata.
|
||||
@ -741,14 +920,17 @@ class Project(object):
|
||||
#
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._module = None
|
||||
self.mode = "single"
|
||||
self.job_name = ""
|
||||
self.job_name = "pmsco0"
|
||||
self.job_tags = {}
|
||||
self.git_hash = ""
|
||||
self.description = ""
|
||||
self.features = {}
|
||||
self.cluster_format = mc.FMT_EDAC
|
||||
self.cluster_generator = mc.LegacyClusterGenerator(self)
|
||||
self.cluster_format = pmsco.cluster.FMT_EDAC
|
||||
self.cluster_generator = pmsco.cluster.LegacyClusterGenerator(self)
|
||||
self._model_space = None
|
||||
self.scans = []
|
||||
self.domains = []
|
||||
self.optimizer_params = {
|
||||
@ -758,39 +940,170 @@ class Project(object):
|
||||
'recalc_seed': True,
|
||||
'table_file': ""
|
||||
}
|
||||
self.data_dir = ""
|
||||
self.output_dir = ""
|
||||
self.output_file = "pmsco_data"
|
||||
self.directories = {
|
||||
"home": Path.home(),
|
||||
"work": Path.cwd(),
|
||||
"data": "",
|
||||
"project": "",
|
||||
"output": "",
|
||||
"temp": ""}
|
||||
self.log_file = ""
|
||||
self.log_level = "WARNING"
|
||||
self.db_file = ':memory:'
|
||||
self.timedelta_limit = datetime.timedelta(days=1)
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
self.files = files.FileTracker()
|
||||
self.files = pmsco.files.FileTracker()
|
||||
self.keep_files = list(pmsco.files.FILE_CATEGORIES_TO_KEEP)
|
||||
self.keep_levels = 1
|
||||
self.keep_best = 10
|
||||
self.handler_classes = {
|
||||
'model': handlers.SingleModelHandler,
|
||||
'scan': handlers.ScanHandler,
|
||||
'domain': handlers.DomainHandler,
|
||||
'emit': handlers.EmitterHandler,
|
||||
'region': handlers.SingleRegionHandler
|
||||
'model': pmsco.handlers.SingleModelHandler,
|
||||
'scan': pmsco.handlers.ScanHandler,
|
||||
'domain': pmsco.handlers.DomainHandler,
|
||||
'emit': pmsco.handlers.EmitterHandler,
|
||||
'region': pmsco.handlers.SingleRegionHandler
|
||||
}
|
||||
self.atomic_scattering_factory = InternalAtomicCalculator
|
||||
self.multiple_scattering_factory = EdacCalculator
|
||||
self._tasks_fields = []
|
||||
self._db = database.ResultsDatabase()
|
||||
self._db = pmsco.database.ResultsDatabase()
|
||||
|
||||
def validate(self):
|
||||
"""
|
||||
validate the project parameters before starting the calculations
|
||||
|
||||
the method checks and fixes attributes that may cause trouble or go unnoticed if they are wrong.
|
||||
in addition, it fixes attributes which may be incomplete after loading a run-file.
|
||||
failed critical checks raise an exception (AssertionError, AttributeError, KeyError, ValueError).
|
||||
checks that cause an attribute do revert to default, are logged as warning.
|
||||
|
||||
the following attributes are fixed silently:
|
||||
- scattering factories that are declared as string are looked up in the project module.
|
||||
- place holders in the directories attribute are resolved.
|
||||
- place holders in the output_file attribute are resolved.
|
||||
- output_file and output_dir are made consistent (so that output_file includes output_dir).
|
||||
- the create_model_space() method is called if the model_space attribute is undefined.
|
||||
- scan data are loaded.
|
||||
|
||||
@note to check the syntax of a run-file, set the calculation mode to 'validate' and run pmsco.
|
||||
this will pass the validate method but will stop execution before calculations are started.
|
||||
|
||||
@raise AssertionError if a parameter is not correct.
|
||||
@raise AttributeError if a class name cannot be resolved.
|
||||
"""
|
||||
assert self.mode in {"single", "swarm", "genetic", "grid", "table", "test", "validate"}
|
||||
|
||||
if isinstance(self.atomic_scattering_factory, str):
|
||||
self.atomic_scattering_factory = getattr(self._module, self.atomic_scattering_factory)
|
||||
if isinstance(self.multiple_scattering_factory, str):
|
||||
self.multiple_scattering_factory = getattr(self._module, self.multiple_scattering_factory)
|
||||
|
||||
self.directories = {k: config.resolve_path(Path(v), self.directories) for k, v in self.directories.items()}
|
||||
|
||||
assert len(str(self.output_file))
|
||||
d = config.resolve_path(self.directories['output'], self.directories)
|
||||
f = config.resolve_path(self.output_file, self.directories)
|
||||
self.output_file = Path(d, f)
|
||||
self.directories['output'] = self.output_file.parent
|
||||
|
||||
if self._model_space is None or not self._model_space.start:
|
||||
logger.warning("undefined model_space attribute, trying project's create_model_space")
|
||||
self._model_space = self.create_model_space()
|
||||
|
||||
self.load_scans()
|
||||
|
||||
@property
|
||||
def data_dir(self):
|
||||
return self.directories['data']
|
||||
|
||||
@data_dir.setter
|
||||
def data_dir(self, path):
|
||||
self.directories['data'] = Path(path)
|
||||
|
||||
@property
|
||||
def output_dir(self):
|
||||
return self.directories['output']
|
||||
|
||||
@output_dir.setter
|
||||
def output_dir(self, path):
|
||||
self.directories['output'] = Path(path)
|
||||
|
||||
@property
|
||||
def output_file(self):
|
||||
return Path(self.directories['output'], self.job_name)
|
||||
|
||||
@output_file.setter
|
||||
def output_file(self, filename):
|
||||
"""
|
||||
set path and base name of output file.
|
||||
|
||||
path is copied to the output_dir attribute.
|
||||
the file stem is copied to the job_name attribute.
|
||||
|
||||
@param filename: (PathLike)
|
||||
"""
|
||||
p = Path(filename)
|
||||
s = str(p.parent)
|
||||
if s and s != ".":
|
||||
self.directories['output'] = p.parent
|
||||
s = str(p.stem)
|
||||
if s:
|
||||
self.job_name = s
|
||||
else:
|
||||
raise ValueError("invalid output file name")
|
||||
|
||||
@property
|
||||
def time_limit(self):
|
||||
return self.timedelta_limit.total_seconds() / 3600 / 24
|
||||
|
||||
@time_limit.setter
|
||||
def time_limit(self, hours):
|
||||
self.timedelta_limit = datetime.timedelta(hours=hours)
|
||||
|
||||
def create_model_space(self):
|
||||
"""
|
||||
create a project.ModelSpace object which defines the allowed range for model parameters.
|
||||
|
||||
this method must be implemented by the actual project class.
|
||||
the ModelSpace object must declare all model parameters used in the project.
|
||||
there are three ways for a project to declare the model space:
|
||||
1. implement the @ref create_model_space method.
|
||||
this is the older way and may become deprecated in a future version.
|
||||
2. assign a ModelSpace to the self.model_space property directly
|
||||
(in the @ref validate method).
|
||||
3. declare the model space in the run-file.
|
||||
|
||||
this method is called by the validate method only if self._model_space is undefined.
|
||||
|
||||
@return ModelSpace object
|
||||
"""
|
||||
return None
|
||||
|
||||
@property
|
||||
def model_space(self):
|
||||
"""
|
||||
ModelSpace object that defines the allowed range for model parameters.
|
||||
|
||||
there are three ways for a project to declare the model space:
|
||||
1. implement the @ref create_model_space method.
|
||||
this is the older way and may become deprecated in a future version.
|
||||
2. assign a ModelSpace to the self.model_space property directly
|
||||
(in the @ref validate method).
|
||||
3. declare the model space in the run-file.
|
||||
|
||||
initially, this property is None.
|
||||
"""
|
||||
return self._model_space
|
||||
|
||||
@model_space.setter
|
||||
def model_space(self, value):
|
||||
if isinstance(value, ModelSpace):
|
||||
self._model_space = value
|
||||
elif hasattr(value, 'items'):
|
||||
self._model_space = ModelSpace()
|
||||
self._model_space.set_param_dict(value)
|
||||
else:
|
||||
raise ValueError("incompatible object type")
|
||||
|
||||
def create_params(self, model, index):
|
||||
"""
|
||||
create a CalculatorParams object given the model parameters and calculation index.
|
||||
@ -816,11 +1129,15 @@ class Project(object):
|
||||
self.combined_scan = None
|
||||
self.combined_modf = None
|
||||
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
|
||||
def add_scan(self, filename, emitter, initial_state, is_modf=False, positions=None):
|
||||
"""
|
||||
add the file name of reference experiment and load it.
|
||||
|
||||
the extension must be one of msc_data.DATATYPES (case insensitive)
|
||||
add a scan specification to the scans list.
|
||||
|
||||
this is a shortcut for adding a ScanCreator or ScanLoader object to the self.scans list.
|
||||
the creator or loader are converted into full Scan objects just before the calculation starts
|
||||
(in the self.setup() method).
|
||||
|
||||
the extension must be one of pmsco.data.DATATYPES (case insensitive)
|
||||
corresponding to the meaning of the columns in the file.
|
||||
|
||||
caution: EDAC can only calculate equidistant, rectangular scans.
|
||||
@ -831,9 +1148,6 @@ class Project(object):
|
||||
* intensity vs theta, phi, or alpha
|
||||
* intensity vs theta and phi (hemisphere or hologram scan)
|
||||
|
||||
the method calculates the modulation function if @c is_modf is @c False.
|
||||
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
|
||||
|
||||
@param filename: (string) file name of the experimental data, possibly including a path.
|
||||
the file is not loaded when the optional positions argument is present,
|
||||
but the filename may serve as basename for output files (e.g. modulation function).
|
||||
@ -852,57 +1166,64 @@ class Project(object):
|
||||
@param is_modf: (bool) declares whether the file contains the modulation function (True),
|
||||
or intensity (False, default). In the latter case, the modulation function is calculated internally.
|
||||
|
||||
@param modf_model: (dict) model parameters to be passed to the modulation function.
|
||||
|
||||
@return (Scan) the new scan object (which is also a member of self.scans).
|
||||
"""
|
||||
scan = Scan()
|
||||
if positions is not None:
|
||||
scan.define_scan(positions, emitter, initial_state)
|
||||
scan.filename = filename
|
||||
scan = ScanCreator()
|
||||
scan.positions = positions
|
||||
else:
|
||||
scan.import_scan_file(filename, emitter, initial_state)
|
||||
scan = ScanLoader()
|
||||
scan.is_modf = is_modf
|
||||
|
||||
scan.filename = filename
|
||||
scan.emitter = emitter
|
||||
scan.initial_state = initial_state
|
||||
self.scans.append(scan)
|
||||
|
||||
if modf_model is None:
|
||||
modf_model = {}
|
||||
return scan
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if is_modf:
|
||||
scan.modulation = scan.raw_data
|
||||
else:
|
||||
def load_scans(self):
|
||||
"""
|
||||
load all scan data.
|
||||
|
||||
initially, the self.scans list may contain objects of different classes (Scan, ScanLoader, ScanCreator)
|
||||
depending on the project initialization.
|
||||
this method loads all data, so that the scans list contains only Scan objects.
|
||||
|
||||
also, the self.combined_scan and self.combined_modf fields are calculated from the scans.
|
||||
"""
|
||||
has_raw_data = True
|
||||
has_mod_func = True
|
||||
loaded_scans = []
|
||||
|
||||
for idx, scan in enumerate(self.scans):
|
||||
scan = scan.load(dirs=self.directories)
|
||||
loaded_scans.append(scan)
|
||||
if scan.modulation is None:
|
||||
try:
|
||||
scan.modulation = self.calc_modulation(scan.raw_data, modf_model)
|
||||
scan.modulation = self.calc_modulation(scan.raw_data, self.model_space.start)
|
||||
except ValueError:
|
||||
logger.error("error calculating the modulation function of experimental data.")
|
||||
scan.modulation = None
|
||||
else:
|
||||
scan.modulation = None
|
||||
logger.error(f"error calculating the modulation function of scan {idx}.")
|
||||
has_raw_data = has_raw_data and scan.raw_data is not None
|
||||
has_mod_func = has_mod_func and scan.modulation is not None
|
||||
self.scans = loaded_scans
|
||||
|
||||
if scan.raw_data is not None:
|
||||
if self.combined_scan is not None:
|
||||
dt = md.common_dtype((self.combined_scan, scan.raw_data))
|
||||
d1 = md.restructure_data(self.combined_scan, dt)
|
||||
d2 = md.restructure_data(scan.raw_data, dt)
|
||||
self.combined_scan = np.hstack((d1, d2))
|
||||
else:
|
||||
self.combined_scan = scan.raw_data.copy()
|
||||
if has_raw_data:
|
||||
stack1 = [scan.raw_data for scan in self.scans]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
self.combined_scan = np.hstack(tuple(stack2))
|
||||
else:
|
||||
self.combined_scan = None
|
||||
|
||||
if scan.modulation is not None:
|
||||
if self.combined_modf is not None:
|
||||
dt = md.common_dtype((self.combined_modf, scan.modulation))
|
||||
d1 = md.restructure_data(self.combined_modf, dt)
|
||||
d2 = md.restructure_data(scan.modulation, dt)
|
||||
self.combined_modf = np.hstack((d1, d2))
|
||||
else:
|
||||
self.combined_modf = scan.modulation.copy()
|
||||
if has_mod_func:
|
||||
stack1 = [scan.modulation for scan in self.scans]
|
||||
dtype = md.common_dtype(stack1)
|
||||
stack2 = [md.restructure_data(data, dtype) for data in stack1]
|
||||
self.combined_modf = np.hstack(tuple(stack2))
|
||||
else:
|
||||
self.combined_modf = None
|
||||
|
||||
return scan
|
||||
|
||||
def clear_domains(self):
|
||||
"""
|
||||
clear domains.
|
||||
@ -933,42 +1254,6 @@ class Project(object):
|
||||
"""
|
||||
self.domains.append(domain)
|
||||
|
||||
def set_output(self, filename):
|
||||
"""
|
||||
set path and base name of output file.
|
||||
|
||||
path and name are copied to the output_file attribute.
|
||||
path is copied to the output_dir attribute.
|
||||
|
||||
if the path is missing, the destination is the current working directory.
|
||||
"""
|
||||
self.output_file = filename
|
||||
path, name = os.path.split(filename)
|
||||
self.output_dir = path
|
||||
self.job_name = name
|
||||
|
||||
def set_timedelta_limit(self, timedelta, margin_minutes=10):
|
||||
"""
|
||||
set the walltime limit with a safety margin.
|
||||
|
||||
this method sets the internal self.timedelta_limit attribute.
|
||||
by default, a safety margin of 10 minutes is subtracted to the main argument
|
||||
in order to increase the probability that the process ends in time.
|
||||
if this is not wanted, the project class may override the method and provide its own margin.
|
||||
|
||||
the method is typically called with the command line time limit from the main module.
|
||||
|
||||
@note the safety margin could be applied at various levels.
|
||||
it is done here because it can easily be overridden by the project subclass.
|
||||
to keep run scripts simple, the command line can be given the same time limit
|
||||
as the job scheduler of the computing cluster.
|
||||
|
||||
@param timedelta: (datetime.timedelta) max. duration of the calculation process (wall time).
|
||||
|
||||
@param margin_minutes: (int) safety margin in minutes to subtract from timedelta.
|
||||
"""
|
||||
self.timedelta_limit = timedelta - datetime.timedelta(minutes=margin_minutes)
|
||||
|
||||
def log_project_args(self):
|
||||
"""
|
||||
send some common project attributes to the log.
|
||||
@ -981,6 +1266,14 @@ class Project(object):
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
for key in self.directories:
|
||||
val = self.directories[key]
|
||||
lev = logging.WARNING if val else logging.DEBUG
|
||||
logger.log(lev, f"directories['{key}']: {val}")
|
||||
|
||||
logger.warning("output file: {0}".format(self.output_file))
|
||||
logger.warning("database: {0}".format(self.db_file))
|
||||
|
||||
logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
|
||||
logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
|
||||
logger.warning("optimization mode: {0}".format(self.mode))
|
||||
@ -990,15 +1283,11 @@ class Project(object):
|
||||
lev = logging.WARNING if val else logging.DEBUG
|
||||
logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
|
||||
|
||||
logger.warning("data directory: {0}".format(self.data_dir))
|
||||
logger.warning("output file: {0}".format(self.output_file))
|
||||
logger.warning("database: {0}".format(self.db_file))
|
||||
|
||||
_files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
|
||||
_files_to_keep = pmsco.files.FILE_CATEGORIES - self.files.categories_to_delete
|
||||
logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
|
||||
|
||||
for idx, scan in enumerate(self.scans):
|
||||
logger.warning(f"scan {idx}: {scan.filename} ({scan.emitter} {scan.initial_state}")
|
||||
logger.warning(f"scan {idx}: {scan.filename} ({scan.emitter} {scan.initial_state})")
|
||||
for idx, dom in enumerate(self.domains):
|
||||
logger.warning(f"domain {idx}: {dom}")
|
||||
|
||||
@ -1247,16 +1536,26 @@ class Project(object):
|
||||
"""
|
||||
self.git_hash = self.get_git_hash()
|
||||
fields = ["rfac"]
|
||||
fields.extend(dispatch.CalcID._fields)
|
||||
fields.extend(pmsco.dispatch.CalcID._fields)
|
||||
fields.append("secs")
|
||||
fields = ["_" + f for f in fields]
|
||||
mspace = self.create_model_space()
|
||||
model_fields = list(mspace.start.keys())
|
||||
model_fields = list(self.model_space.start.keys())
|
||||
model_fields.sort(key=lambda name: name.lower())
|
||||
fields.extend(model_fields)
|
||||
self._tasks_fields = fields
|
||||
|
||||
with open(self.output_file + ".tasks.dat", "w") as outfile:
|
||||
if 'all' in self.keep_files:
|
||||
cats = set([])
|
||||
else:
|
||||
cats = pmsco.files.FILE_CATEGORIES - set(self.keep_files)
|
||||
cats -= {'report'}
|
||||
if self.mode == 'single':
|
||||
cats -= {'model'}
|
||||
self.files.categories_to_delete = cats
|
||||
|
||||
Path(self.output_file).parent.mkdir(parents=True, exist_ok=True)
|
||||
tasks_file = Path(self.output_file).with_suffix(".tasks.dat")
|
||||
with open(tasks_file, "w") as outfile:
|
||||
outfile.write("# ")
|
||||
outfile.write(" ".join(fields))
|
||||
outfile.write("\n")
|
||||
@ -1311,7 +1610,8 @@ class Project(object):
|
||||
values_dict['_rfac'] = parent_task.rfac
|
||||
values_dict['_secs'] = parent_task.time.total_seconds()
|
||||
values_list = [values_dict[field] for field in self._tasks_fields]
|
||||
with open(self.output_file + ".tasks.dat", "a") as outfile:
|
||||
tasks_file = Path(self.output_file).with_suffix(".tasks.dat")
|
||||
with open(tasks_file, "a") as outfile:
|
||||
outfile.write(" ".join(format(value) for value in values_list) + "\n")
|
||||
|
||||
db_id = self._db.insert_result(parent_task.id, values_dict)
|
||||
@ -1548,11 +1848,11 @@ class Project(object):
|
||||
"""
|
||||
_files = {}
|
||||
xyz_filename = filename + ".xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ)
|
||||
cluster.save_to_file(xyz_filename, fmt=pmsco.cluster.FMT_XYZ)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
xyz_filename = filename + ".emit.xyz"
|
||||
cluster.save_to_file(xyz_filename, fmt=mc.FMT_XYZ, emitters_only=True)
|
||||
cluster.save_to_file(xyz_filename, fmt=pmsco.cluster.FMT_XYZ, emitters_only=True)
|
||||
_files[xyz_filename] = 'cluster'
|
||||
|
||||
return _files
|
||||
|
Reference in New Issue
Block a user