public release 4.2.0 - see README.md and CHANGES.md for details
This commit is contained in:
442
pmsco/reports/results.py
Normal file
442
pmsco/reports/results.py
Normal file
@@ -0,0 +1,442 @@
|
||||
"""
|
||||
@package pmsco.reports.results
|
||||
query and filter result data for reports
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2021 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
import pmsco.database.orm as db_orm
|
||||
import pmsco.database.query as db_query
|
||||
import pmsco.database.util as db_util
|
||||
from pmsco.project import ModelSpace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def array_remove_columns(a, cols):
|
||||
"""
|
||||
return a copy of a structured array with some columns removed.
|
||||
|
||||
@param a: numpy structured array
|
||||
@param cols: sequence of column names to be removed
|
||||
@return: new array
|
||||
"""
|
||||
dtb = [dt for dt in a.dtype if dt[0] not in cols]
|
||||
b = np.empty(a.shape, dtype=dtb)
|
||||
for col in b.dtype.names():
|
||||
b[col] = a[col]
|
||||
return b
|
||||
|
||||
|
||||
def array_range(a):
|
||||
"""
|
||||
determine a default range from actual values.
|
||||
|
||||
@param a: (numpy.ndarray) 1-dimensional structured array of parameter values.
|
||||
@return: range_min, range_max are dictionaries of the minimum and maximum values of each parameter.
|
||||
"""
|
||||
names = db_util.regular_params(a.dtype.names)
|
||||
range_min = {}
|
||||
range_max = {}
|
||||
for name in names:
|
||||
range_min[name] = a[name].min()
|
||||
range_max[name] = a[name].max()
|
||||
return range_min, range_max
|
||||
|
||||
|
||||
class ResultData(object):
|
||||
"""
|
||||
data structure for results
|
||||
|
||||
the data is stored in the values and deltas arrays.
|
||||
the arrays are numpy structured arrays
|
||||
and contain a flat list of results (Result table of database)
|
||||
along with parameter and system variables
|
||||
in the format returned by pmsco.database.query.query_model_results_array().
|
||||
|
||||
values and deltas must have the same data type (same fields).
|
||||
deltas can be None if they are not available.
|
||||
|
||||
the other attributes serve two purposes:
|
||||
some of them define filter rules that are applied by some of the data loading methods or the apply_filters() method.
|
||||
after loading, the update_collections() method updates them to describe actually loaded data.
|
||||
|
||||
@attention if you want to reuse the same instance for multiple loads,
|
||||
check or reset the filter attributes before each loading.
|
||||
"""
|
||||
|
||||
## @var generations
|
||||
# sequence of generation numbers loaded
|
||||
#
|
||||
# on loading, data is filtered by the generation numbers in this sequence (`in` operator, see apply_filters() method).
|
||||
# by default (None), all generations are loaded.
|
||||
# after loading, the sequence contains the loaded generation numbers (update_collections() method).
|
||||
|
||||
## @var particles
|
||||
# sequence of particle numbers loaded
|
||||
#
|
||||
# on loading, data is filtered by the particle numbers in this sequence (`in` operator, see apply_filters() method).
|
||||
# by default (None), all particles are loaded.
|
||||
# after loading, the sequence contains the loaded particle numbers (update_collections() method).
|
||||
|
||||
## @var levels
|
||||
# dictionary of level indices loaded
|
||||
#
|
||||
# the dictionary is organized by task level.
|
||||
# allowed keys are: 'scan', 'domain', 'emit' (one or more).
|
||||
# the values are sequences of index numbers.
|
||||
#
|
||||
# on loading, data is filtered by level numbers (`in` operator, see apply_filters() method).
|
||||
# by default (None), all levels are loaded.
|
||||
# after loading, the sequence contains the loaded indices (update_collections() method).
|
||||
|
||||
## @var filters
|
||||
# extra database filters
|
||||
#
|
||||
# filter expressions that are not covered by the generations, particles and levels attributes can be entered here.
|
||||
# this must be a sequence of sqlalchemy expressions that are passed to the Query.filter methods.
|
||||
# these filters take effect in load_from_db() only.
|
||||
|
||||
## @var order
|
||||
# sort order in database query
|
||||
#
|
||||
# this must be a sequence of pmsco.database.orm.Model and pmsco.database.orm.Result attributes.
|
||||
# this sort order takes effect in load_from_db() only.
|
||||
|
||||
## @var model_space
|
||||
# parameter range
|
||||
#
|
||||
# pmsco.project.ModelSpace object, only min and max are used.
|
||||
#
|
||||
# @attention this may be a reference to the project's model space object rather than an independent copy.
|
||||
# do not modify the object directly! instead, copy the old one and modify the copy!
|
||||
|
||||
## @var values
|
||||
# value data
|
||||
#
|
||||
|
||||
## @var deltas
|
||||
# delta data (if loaded)
|
||||
#
|
||||
|
||||
def __init__(self):
|
||||
self.generations = None
|
||||
self.params = None
|
||||
self.particles = None
|
||||
self.levels = None
|
||||
self.filters = None
|
||||
self.order = None
|
||||
self.values = None
|
||||
self.deltas = None
|
||||
self.model_space = None
|
||||
|
||||
def load_any(self, values, deltas=None):
|
||||
"""
|
||||
load data of any accepted type.
|
||||
|
||||
the method tries to detect the type of input data and calls the specialized load method.
|
||||
the data can be one of the types accepted by
|
||||
load_from_population(), load_from_arrays(), load_from_text(), or load_from_db().
|
||||
|
||||
@param values: value data
|
||||
@param deltas: delta data (optional)
|
||||
@return: None
|
||||
"""
|
||||
if isinstance(values, np.ndarray):
|
||||
self.load_from_arrays(values, deltas)
|
||||
elif hasattr(values, 'pos') and hasattr(values, 'vel'):
|
||||
self.load_from_population(values)
|
||||
elif hasattr(values, 'query'):
|
||||
self.load_from_db(values)
|
||||
else:
|
||||
self.load_from_text(values, deltas)
|
||||
|
||||
def load_from_population(self, pop):
|
||||
"""
|
||||
load data from a population object.
|
||||
|
||||
the object should be of pmsco.optimizer.population.Population type
|
||||
or have the same pos, vel, results, generation, model_min and model_max attributes.
|
||||
|
||||
loaded data is filtered by generations, particles, and/or levels by the apply_filters() method.
|
||||
|
||||
@param pop: Population-like object
|
||||
@return: None
|
||||
"""
|
||||
# the _rfac field of pop.pos is undefined
|
||||
pos = np.copy(pop.pos)
|
||||
pos['_rfac'] = np.nan
|
||||
self.values = np.concatenate([pop.results, pos])
|
||||
self.deltas = np.copy(pop.vel)
|
||||
self.apply_filters()
|
||||
self.update_collections()
|
||||
self.generations = np.array((pop.generation,))
|
||||
self.model_space = ModelSpace()
|
||||
self.model_space.min = pop.model_min
|
||||
self.model_space.max = pop.model_max
|
||||
|
||||
def load_from_arrays(self, values, deltas=None):
|
||||
"""
|
||||
load data from numpy arrays.
|
||||
|
||||
data type must be the same as used by pmsco.optimizer.population.Population.
|
||||
|
||||
loaded data is filtered by generations, particles, and/or levels by the apply_filters() method.
|
||||
|
||||
@param values: path-like or open file
|
||||
@param deltas: path-like or open file
|
||||
@return: None
|
||||
@raise OSError if file can't be loaded.
|
||||
"""
|
||||
self.values = values
|
||||
self.deltas = deltas
|
||||
self.apply_filters()
|
||||
self.update_collections()
|
||||
|
||||
def load_from_text(self, values_file, deltas_file=None):
|
||||
"""
|
||||
load data from results file (.dat or .tasks.dat)
|
||||
|
||||
loaded data is filtered by generations, particles, and/or levels by the apply_filters() method.
|
||||
|
||||
@param values_file: path-like or open file
|
||||
@param deltas_file: path-like or open file
|
||||
@return: None
|
||||
@raise OSError if file can't be loaded.
|
||||
"""
|
||||
self.values = np.atleast_1d(np.genfromtxt(values_file, names=True))
|
||||
if deltas_file is not None:
|
||||
self.deltas = np.atleast_1d(np.genfromtxt(deltas_file, names=True))
|
||||
self.apply_filters()
|
||||
self.update_collections()
|
||||
|
||||
def load_from_db(self, session, jobs=-1, include_params=True):
|
||||
"""
|
||||
load data from the database.
|
||||
|
||||
data is filtered on the SQL level by self.generations, self.particles, self.levels and self.filters.
|
||||
data is ordered on the SQL level by self.order,
|
||||
which defaults to generation, particle, scan, domain, emit.
|
||||
|
||||
@param session: database session, from pmsco.database.access.DatabaseAccess.session().
|
||||
@param jobs: filter by job.
|
||||
the argument can be a singleton or sequence of orm.Job objects or numeric id.
|
||||
if None, results from all jobs are loaded.
|
||||
if -1 (default), results from the most recent job (by datetime field) are loaded.
|
||||
@param include_params: include parameter values of each model in the result (True, default).
|
||||
if you're just interested in the R-factor, set this to False and parameter values are not retrieved.
|
||||
@return: None
|
||||
"""
|
||||
if jobs == -1:
|
||||
jobs = db_query.query_newest_job(session)
|
||||
|
||||
filters = self.filters if self.filters is not None else []
|
||||
if self.generations is not None:
|
||||
filters.append(db_orm.Model.gen.in_(self.generations))
|
||||
if self.particles is not None:
|
||||
filters.append(db_orm.Model.particle.in_(self.particles))
|
||||
if self.levels is not None:
|
||||
for k, v in self.levels.items():
|
||||
if k[0] == '_':
|
||||
k = k[1:]
|
||||
if hasattr(type(v), '__iter__'):
|
||||
filters.append(getattr(db_orm.Result, k).in_(v))
|
||||
else:
|
||||
filters.append(getattr(db_orm.Result, k) == v)
|
||||
|
||||
if self.order is None:
|
||||
self.order = [db_orm.Model.gen, db_orm.Model.particle,
|
||||
db_orm.Result.scan, db_orm.Result.domain, db_orm.Result.emit]
|
||||
|
||||
hook_data = {'filters': filters}
|
||||
self.values, self.deltas = db_query.query_model_results_array(session,
|
||||
jobs=jobs,
|
||||
query_hook=self._filters_hook,
|
||||
hook_data=hook_data,
|
||||
include_params=include_params,
|
||||
order=self.order)
|
||||
|
||||
self.update_collections()
|
||||
|
||||
@staticmethod
|
||||
def _filters_hook(query, filters):
|
||||
"""
|
||||
hook function used in ResultData.load_from_db
|
||||
|
||||
the function adds a sequence of conditions to a database query.
|
||||
|
||||
@param query: sqlalchemy query object
|
||||
@param filters: sequence of filter expressions to be passed to query.filter.
|
||||
example: db_orm.Model.gen.in_([1,2,3]])
|
||||
@return: modified query
|
||||
"""
|
||||
for f in filters:
|
||||
query = query.filter(f)
|
||||
return query
|
||||
|
||||
def load_from_project(self, project):
|
||||
"""
|
||||
define model space from project
|
||||
|
||||
@note we copy the reference. the object must not be modified!
|
||||
|
||||
@param project:
|
||||
@return:
|
||||
"""
|
||||
self.model_space = project.model_space
|
||||
|
||||
def reset_filters(self):
|
||||
"""
|
||||
reset all filter attributes to default values
|
||||
|
||||
this function resets all instance attributes that modify the query statement to their default values.
|
||||
in particular: generations, params, particles, levels, filters and order.
|
||||
it does not affect the values and deltas arrays.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.generations = None
|
||||
self.params = None
|
||||
self.particles = None
|
||||
self.levels = None
|
||||
self.filters = None
|
||||
self.order = None
|
||||
|
||||
def apply_filters(self):
|
||||
"""
|
||||
apply generation, particle and level filters to loaded arrays.
|
||||
|
||||
this method acts on the loaded values and deltas arrays and replaces with views of the original arrays.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
filters = {}
|
||||
if self.generations is not None:
|
||||
filters['_gen'] = list(self.generations)
|
||||
if self.particles is not None:
|
||||
filters['_particle'] = list(self.particles)
|
||||
if self.levels is not None:
|
||||
for k, v in self.levels.items():
|
||||
if k[0] != '_':
|
||||
k = '_' + k
|
||||
if k in self.values.dtype.names:
|
||||
filters[k] = list(v)
|
||||
|
||||
for k, v in filters.items():
|
||||
idx = np.where(np.isin(self.values[k], v))
|
||||
self.values = self.values[idx]
|
||||
if self.deltas is not None:
|
||||
idx = np.where(np.isin(self.deltas[k], v))
|
||||
self.deltas = self.deltas[idx]
|
||||
|
||||
def update_collections(self):
|
||||
"""
|
||||
update attributes that depend on the values and deltas arrays.
|
||||
|
||||
namely: params, generations, particles, model_space
|
||||
|
||||
this method is called by the load methods after loading data.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.params = db_util.regular_params(self.values.dtype.names)
|
||||
|
||||
try:
|
||||
self.generations = np.unique(self.values['_gen'])
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
|
||||
try:
|
||||
self.particles = np.unique(self.values['_particle'])
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
|
||||
self.model_space = ModelSpace()
|
||||
self.model_space.min, self.model_space.max = array_range(self.values)
|
||||
|
||||
def debug_log(self):
|
||||
logger.debug(f"params = {self.params}")
|
||||
logger.debug(f"generations = {self.generations}")
|
||||
logger.debug(f"particles = {self.particles}")
|
||||
logger.debug(f"levels = {self.levels}")
|
||||
logger.debug(f"model_space.min = {self.model_space.min}")
|
||||
logger.debug(f"model_space.max = {self.model_space.max}")
|
||||
logger.debug(f"values.shape = {self.values.shape}")
|
||||
logger.debug(f"values.dtype = {self.values.dtype}")
|
||||
|
||||
def set_model_space(self, model_space):
|
||||
"""
|
||||
set the model space (parameter value range)
|
||||
|
||||
@note the model space is updated by the load methods and update_collections().
|
||||
|
||||
@param model_space: model space can be a pmsco.project.ModelSpace object,
|
||||
any object that contains the same min and max attributes as pmsco.project.ModelSpace,
|
||||
or a dictionary with two keys 'min' and 'max' that provides the corresponding ModelSpace dictionaries.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
if isinstance(model_space, ModelSpace):
|
||||
self.model_space = model_space
|
||||
else:
|
||||
self.model_space = ModelSpace()
|
||||
try:
|
||||
self.model_space.min, self.model_space.max = model_space.min, model_space.max
|
||||
except AttributeError:
|
||||
self.model_space.min, self.model_space.max = model_space['min'], model_space['max']
|
||||
|
||||
def non_degenerate_params(self):
|
||||
"""
|
||||
get the names of non-degenerate parameters
|
||||
|
||||
the result set contains the names of all parameters
|
||||
where the upper range limit of the model space is strictly greater than the lower.
|
||||
|
||||
the result is based on the params and model_space attributes.
|
||||
|
||||
@return: set of strings (not ordered).
|
||||
"""
|
||||
pn = set(self.params)
|
||||
rmn = set(self.model_space.min.keys())
|
||||
rmx = set(self.model_space.max.keys())
|
||||
names = pn.intersection(rmn).intersection(rmx)
|
||||
names = {name for name in names if self.model_space.max[name] > self.model_space.min[name]}
|
||||
return names
|
||||
|
||||
def iterate_generations(self):
|
||||
"""
|
||||
iterate over generations.
|
||||
|
||||
this is a generator function.
|
||||
it yields a @ref ResultData object for each generation,
|
||||
where the data is filtered by generation.
|
||||
|
||||
the @ref ResultData object is a shallow copy of `self`.
|
||||
the attributes are references to the original objects.
|
||||
the `values` and `deltas` are views of the original arrays
|
||||
showing just the elements belonging to one generation.
|
||||
`generations` contains just one element: the generation number.
|
||||
|
||||
@return: one @ref ResultData for each generation
|
||||
"""
|
||||
for gen in self.generations:
|
||||
rd = ResultData()
|
||||
rd.generations = (gen,)
|
||||
rd.params = self.params
|
||||
rd.particles = self.particles
|
||||
rd.levels = self.levels
|
||||
idx = np.where(self.values['_gen'] == gen)
|
||||
rd.values = self.values[idx]
|
||||
if self.deltas is not None:
|
||||
idx = np.where(self.deltas['_gen'] == gen)
|
||||
rd.deltas = self.deltas[idx]
|
||||
rd.model_space = self.model_space
|
||||
yield rd
|
||||
Reference in New Issue
Block a user