443 lines
16 KiB
Python
443 lines
16 KiB
Python
"""
|
|
@package pmsco.reports.results
|
|
query and filter result data for reports
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2021 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import logging
|
|
import numpy as np
|
|
import pmsco.database.orm as db_orm
|
|
import pmsco.database.query as db_query
|
|
import pmsco.database.util as db_util
|
|
from pmsco.project import ModelSpace
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def array_remove_columns(a, cols):
|
|
"""
|
|
return a copy of a structured array with some columns removed.
|
|
|
|
@param a: numpy structured array
|
|
@param cols: sequence of column names to be removed
|
|
@return: new array
|
|
"""
|
|
dtb = [dt for dt in a.dtype if dt[0] not in cols]
|
|
b = np.empty(a.shape, dtype=dtb)
|
|
for col in b.dtype.names():
|
|
b[col] = a[col]
|
|
return b
|
|
|
|
|
|
def array_range(a):
|
|
"""
|
|
determine a default range from actual values.
|
|
|
|
@param a: (numpy.ndarray) 1-dimensional structured array of parameter values.
|
|
@return: range_min, range_max are dictionaries of the minimum and maximum values of each parameter.
|
|
"""
|
|
names = db_util.regular_params(a.dtype.names)
|
|
range_min = {}
|
|
range_max = {}
|
|
for name in names:
|
|
range_min[name] = a[name].min()
|
|
range_max[name] = a[name].max()
|
|
return range_min, range_max
|
|
|
|
|
|
class ResultData(object):
|
|
"""
|
|
data structure for results
|
|
|
|
the data is stored in the values and deltas arrays.
|
|
the arrays are numpy structured arrays
|
|
and contain a flat list of results (Result table of database)
|
|
along with parameter and system variables
|
|
in the format returned by pmsco.database.query.query_model_results_array().
|
|
|
|
values and deltas must have the same data type (same fields).
|
|
deltas can be None if they are not available.
|
|
|
|
the other attributes serve two purposes:
|
|
some of them define filter rules that are applied by some of the data loading methods or the apply_filters() method.
|
|
after loading, the update_collections() method updates them to describe actually loaded data.
|
|
|
|
@attention if you want to reuse the same instance for multiple loads,
|
|
check or reset the filter attributes before each loading.
|
|
"""
|
|
|
|
## @var generations
|
|
# sequence of generation numbers loaded
|
|
#
|
|
# on loading, data is filtered by the generation numbers in this sequence (`in` operator, see apply_filters() method).
|
|
# by default (None), all generations are loaded.
|
|
# after loading, the sequence contains the loaded generation numbers (update_collections() method).
|
|
|
|
## @var particles
|
|
# sequence of particle numbers loaded
|
|
#
|
|
# on loading, data is filtered by the particle numbers in this sequence (`in` operator, see apply_filters() method).
|
|
# by default (None), all particles are loaded.
|
|
# after loading, the sequence contains the loaded particle numbers (update_collections() method).
|
|
|
|
## @var levels
|
|
# dictionary of level indices loaded
|
|
#
|
|
# the dictionary is organized by task level.
|
|
# allowed keys are: 'scan', 'domain', 'emit' (one or more).
|
|
# the values are sequences of index numbers.
|
|
#
|
|
# on loading, data is filtered by level numbers (`in` operator, see apply_filters() method).
|
|
# by default (None), all levels are loaded.
|
|
# after loading, the sequence contains the loaded indices (update_collections() method).
|
|
|
|
## @var filters
|
|
# extra database filters
|
|
#
|
|
# filter expressions that are not covered by the generations, particles and levels attributes can be entered here.
|
|
# this must be a sequence of sqlalchemy expressions that are passed to the Query.filter methods.
|
|
# these filters take effect in load_from_db() only.
|
|
|
|
## @var order
|
|
# sort order in database query
|
|
#
|
|
# this must be a sequence of pmsco.database.orm.Model and pmsco.database.orm.Result attributes.
|
|
# this sort order takes effect in load_from_db() only.
|
|
|
|
## @var model_space
|
|
# parameter range
|
|
#
|
|
# pmsco.project.ModelSpace object, only min and max are used.
|
|
#
|
|
# @attention this may be a reference to the project's model space object rather than an independent copy.
|
|
# do not modify the object directly! instead, copy the old one and modify the copy!
|
|
|
|
## @var values
|
|
# value data
|
|
#
|
|
|
|
## @var deltas
|
|
# delta data (if loaded)
|
|
#
|
|
|
|
def __init__(self):
|
|
self.generations = None
|
|
self.params = None
|
|
self.particles = None
|
|
self.levels = None
|
|
self.filters = None
|
|
self.order = None
|
|
self.values = None
|
|
self.deltas = None
|
|
self.model_space = None
|
|
|
|
def load_any(self, values, deltas=None):
|
|
"""
|
|
load data of any accepted type.
|
|
|
|
the method tries to detect the type of input data and calls the specialized load method.
|
|
the data can be one of the types accepted by
|
|
load_from_population(), load_from_arrays(), load_from_text(), or load_from_db().
|
|
|
|
@param values: value data
|
|
@param deltas: delta data (optional)
|
|
@return: None
|
|
"""
|
|
if isinstance(values, np.ndarray):
|
|
self.load_from_arrays(values, deltas)
|
|
elif hasattr(values, 'pos') and hasattr(values, 'vel'):
|
|
self.load_from_population(values)
|
|
elif hasattr(values, 'query'):
|
|
self.load_from_db(values)
|
|
else:
|
|
self.load_from_text(values, deltas)
|
|
|
|
def load_from_population(self, pop):
|
|
"""
|
|
load data from a population object.
|
|
|
|
the object should be of pmsco.optimizer.population.Population type
|
|
or have the same pos, vel, results, generation, model_min and model_max attributes.
|
|
|
|
loaded data is filtered by generations, particles, and/or levels by the apply_filters() method.
|
|
|
|
@param pop: Population-like object
|
|
@return: None
|
|
"""
|
|
# the _rfac field of pop.pos is undefined
|
|
pos = np.copy(pop.pos)
|
|
pos['_rfac'] = np.nan
|
|
self.values = np.concatenate([pop.results, pos])
|
|
self.deltas = np.copy(pop.vel)
|
|
self.apply_filters()
|
|
self.update_collections()
|
|
self.generations = np.array((pop.generation,))
|
|
self.model_space = ModelSpace()
|
|
self.model_space.min = pop.model_min
|
|
self.model_space.max = pop.model_max
|
|
|
|
def load_from_arrays(self, values, deltas=None):
|
|
"""
|
|
load data from numpy arrays.
|
|
|
|
data type must be the same as used by pmsco.optimizer.population.Population.
|
|
|
|
loaded data is filtered by generations, particles, and/or levels by the apply_filters() method.
|
|
|
|
@param values: path-like or open file
|
|
@param deltas: path-like or open file
|
|
@return: None
|
|
@raise OSError if file can't be loaded.
|
|
"""
|
|
self.values = values
|
|
self.deltas = deltas
|
|
self.apply_filters()
|
|
self.update_collections()
|
|
|
|
def load_from_text(self, values_file, deltas_file=None):
|
|
"""
|
|
load data from results file (.dat or .tasks.dat)
|
|
|
|
loaded data is filtered by generations, particles, and/or levels by the apply_filters() method.
|
|
|
|
@param values_file: path-like or open file
|
|
@param deltas_file: path-like or open file
|
|
@return: None
|
|
@raise OSError if file can't be loaded.
|
|
"""
|
|
self.values = np.atleast_1d(np.genfromtxt(values_file, names=True))
|
|
if deltas_file is not None:
|
|
self.deltas = np.atleast_1d(np.genfromtxt(deltas_file, names=True))
|
|
self.apply_filters()
|
|
self.update_collections()
|
|
|
|
def load_from_db(self, session, jobs=-1, include_params=True):
|
|
"""
|
|
load data from the database.
|
|
|
|
data is filtered on the SQL level by self.generations, self.particles, self.levels and self.filters.
|
|
data is ordered on the SQL level by self.order,
|
|
which defaults to generation, particle, scan, domain, emit.
|
|
|
|
@param session: database session, from pmsco.database.access.DatabaseAccess.session().
|
|
@param jobs: filter by job.
|
|
the argument can be a singleton or sequence of orm.Job objects or numeric id.
|
|
if None, results from all jobs are loaded.
|
|
if -1 (default), results from the most recent job (by datetime field) are loaded.
|
|
@param include_params: include parameter values of each model in the result (True, default).
|
|
if you're just interested in the R-factor, set this to False and parameter values are not retrieved.
|
|
@return: None
|
|
"""
|
|
if jobs == -1:
|
|
jobs = db_query.query_newest_job(session)
|
|
|
|
filters = self.filters if self.filters is not None else []
|
|
if self.generations is not None:
|
|
filters.append(db_orm.Model.gen.in_(self.generations))
|
|
if self.particles is not None:
|
|
filters.append(db_orm.Model.particle.in_(self.particles))
|
|
if self.levels is not None:
|
|
for k, v in self.levels.items():
|
|
if k[0] == '_':
|
|
k = k[1:]
|
|
if hasattr(type(v), '__iter__'):
|
|
filters.append(getattr(db_orm.Result, k).in_(v))
|
|
else:
|
|
filters.append(getattr(db_orm.Result, k) == v)
|
|
|
|
if self.order is None:
|
|
self.order = [db_orm.Model.gen, db_orm.Model.particle,
|
|
db_orm.Result.scan, db_orm.Result.domain, db_orm.Result.emit]
|
|
|
|
hook_data = {'filters': filters}
|
|
self.values, self.deltas = db_query.query_model_results_array(session,
|
|
jobs=jobs,
|
|
query_hook=self._filters_hook,
|
|
hook_data=hook_data,
|
|
include_params=include_params,
|
|
order=self.order)
|
|
|
|
self.update_collections()
|
|
|
|
@staticmethod
|
|
def _filters_hook(query, filters):
|
|
"""
|
|
hook function used in ResultData.load_from_db
|
|
|
|
the function adds a sequence of conditions to a database query.
|
|
|
|
@param query: sqlalchemy query object
|
|
@param filters: sequence of filter expressions to be passed to query.filter.
|
|
example: db_orm.Model.gen.in_([1,2,3]])
|
|
@return: modified query
|
|
"""
|
|
for f in filters:
|
|
query = query.filter(f)
|
|
return query
|
|
|
|
def load_from_project(self, project):
|
|
"""
|
|
define model space from project
|
|
|
|
@note we copy the reference. the object must not be modified!
|
|
|
|
@param project:
|
|
@return:
|
|
"""
|
|
self.model_space = project.model_space
|
|
|
|
def reset_filters(self):
|
|
"""
|
|
reset all filter attributes to default values
|
|
|
|
this function resets all instance attributes that modify the query statement to their default values.
|
|
in particular: generations, params, particles, levels, filters and order.
|
|
it does not affect the values and deltas arrays.
|
|
|
|
@return: None
|
|
"""
|
|
self.generations = None
|
|
self.params = None
|
|
self.particles = None
|
|
self.levels = None
|
|
self.filters = None
|
|
self.order = None
|
|
|
|
def apply_filters(self):
|
|
"""
|
|
apply generation, particle and level filters to loaded arrays.
|
|
|
|
this method acts on the loaded values and deltas arrays and replaces with views of the original arrays.
|
|
|
|
@return: None
|
|
"""
|
|
filters = {}
|
|
if self.generations is not None:
|
|
filters['_gen'] = list(self.generations)
|
|
if self.particles is not None:
|
|
filters['_particle'] = list(self.particles)
|
|
if self.levels is not None:
|
|
for k, v in self.levels.items():
|
|
if k[0] != '_':
|
|
k = '_' + k
|
|
if k in self.values.dtype.names:
|
|
filters[k] = list(v)
|
|
|
|
for k, v in filters.items():
|
|
idx = np.where(np.isin(self.values[k], v))
|
|
self.values = self.values[idx]
|
|
if self.deltas is not None:
|
|
idx = np.where(np.isin(self.deltas[k], v))
|
|
self.deltas = self.deltas[idx]
|
|
|
|
def update_collections(self):
|
|
"""
|
|
update attributes that depend on the values and deltas arrays.
|
|
|
|
namely: params, generations, particles, model_space
|
|
|
|
this method is called by the load methods after loading data.
|
|
|
|
@return: None
|
|
"""
|
|
self.params = db_util.regular_params(self.values.dtype.names)
|
|
|
|
try:
|
|
self.generations = np.unique(self.values['_gen'])
|
|
except (KeyError, ValueError):
|
|
pass
|
|
|
|
try:
|
|
self.particles = np.unique(self.values['_particle'])
|
|
except (KeyError, ValueError):
|
|
pass
|
|
|
|
self.model_space = ModelSpace()
|
|
self.model_space.min, self.model_space.max = array_range(self.values)
|
|
|
|
def debug_log(self):
|
|
logger.debug(f"params = {self.params}")
|
|
logger.debug(f"generations = {self.generations}")
|
|
logger.debug(f"particles = {self.particles}")
|
|
logger.debug(f"levels = {self.levels}")
|
|
logger.debug(f"model_space.min = {self.model_space.min}")
|
|
logger.debug(f"model_space.max = {self.model_space.max}")
|
|
logger.debug(f"values.shape = {self.values.shape}")
|
|
logger.debug(f"values.dtype = {self.values.dtype}")
|
|
|
|
def set_model_space(self, model_space):
|
|
"""
|
|
set the model space (parameter value range)
|
|
|
|
@note the model space is updated by the load methods and update_collections().
|
|
|
|
@param model_space: model space can be a pmsco.project.ModelSpace object,
|
|
any object that contains the same min and max attributes as pmsco.project.ModelSpace,
|
|
or a dictionary with two keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
|
|
|
|
@return: None
|
|
"""
|
|
if isinstance(model_space, ModelSpace):
|
|
self.model_space = model_space
|
|
else:
|
|
self.model_space = ModelSpace()
|
|
try:
|
|
self.model_space.min, self.model_space.max = model_space.min, model_space.max
|
|
except AttributeError:
|
|
self.model_space.min, self.model_space.max = model_space['min'], model_space['max']
|
|
|
|
def non_degenerate_params(self):
|
|
"""
|
|
get the names of non-degenerate parameters
|
|
|
|
the result set contains the names of all parameters
|
|
where the upper range limit of the model space is strictly greater than the lower.
|
|
|
|
the result is based on the params and model_space attributes.
|
|
|
|
@return: set of strings (not ordered).
|
|
"""
|
|
pn = set(self.params)
|
|
rmn = set(self.model_space.min.keys())
|
|
rmx = set(self.model_space.max.keys())
|
|
names = pn.intersection(rmn).intersection(rmx)
|
|
names = {name for name in names if self.model_space.max[name] > self.model_space.min[name]}
|
|
return names
|
|
|
|
def iterate_generations(self):
|
|
"""
|
|
iterate over generations.
|
|
|
|
this is a generator function.
|
|
it yields a @ref ResultData object for each generation,
|
|
where the data is filtered by generation.
|
|
|
|
the @ref ResultData object is a shallow copy of `self`.
|
|
the attributes are references to the original objects.
|
|
the `values` and `deltas` are views of the original arrays
|
|
showing just the elements belonging to one generation.
|
|
`generations` contains just one element: the generation number.
|
|
|
|
@return: one @ref ResultData for each generation
|
|
"""
|
|
for gen in self.generations:
|
|
rd = ResultData()
|
|
rd.generations = (gen,)
|
|
rd.params = self.params
|
|
rd.particles = self.particles
|
|
rd.levels = self.levels
|
|
idx = np.where(self.values['_gen'] == gen)
|
|
rd.values = self.values[idx]
|
|
if self.deltas is not None:
|
|
idx = np.where(self.deltas['_gen'] == gen)
|
|
rd.deltas = self.deltas[idx]
|
|
rd.model_space = self.model_space
|
|
yield rd
|