public release 2.2.0 - see README.md and CHANGES.md for details

This commit is contained in:
2020-09-04 16:22:42 +02:00
parent fbd2d4fa8c
commit 7c61eb1b41
67 changed files with 2934 additions and 682 deletions

View File

@ -29,6 +29,7 @@ import sqlite3
import fasteners
import numpy as np
import pmsco.dispatch as dispatch
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@ -60,7 +61,7 @@ DB_SPECIAL_PARAMS = {"job_id": "_db_job",
"result_id": "_db_result",
"model": "_model",
"scan": "_scan",
"sym": "_sym",
"domain": "_domain",
"emit": "_emit",
"region": "_region",
"gen": "_gen",
@ -77,7 +78,7 @@ DB_SPECIAL_NUMPY_TYPES = {"job_id": "i8",
"result_id": "i8",
"model": "i8",
"scan": "i8",
"sym": "i8",
"domain": "i8",
"emit": "i8",
"region": "i8",
"gen": "i8",
@ -259,7 +260,7 @@ class ResultsDatabase(object):
sql_select_model = """select id, job_id, model, gen, particle
from Models where id=:id"""
sql_select_model_model = """select id, job_id, model, gen, particle
from Models where model=:model"""
from Models where job_id=:job_id and model=:model"""
sql_select_model_job = """select id, job_id, model, gen, particle
from Models where job_id=:job_id"""
sql_delete_model = """delete from Models where model_id = :model_id"""
@ -268,7 +269,7 @@ class ResultsDatabase(object):
`id` INTEGER PRIMARY KEY,
`model_id` INTEGER,
`scan` integer,
`sym` integer,
`domain` integer,
`emit` integer,
`region` integer,
`rfac` REAL,
@ -276,22 +277,29 @@ class ResultsDatabase(object):
)"""
sql_index_results_tasks = """create index if not exists
`index_results_tasks` ON `Results`
(`model_id`, `scan`,`sym`,`emit`,`region`)"""
(`model_id`, `scan`,`domain`,`emit`,`region`)"""
sql_drop_index_results_tasks = "drop index if exists index_results_tasks"
sql_index_results_models = """create index if not exists
`index_results_models` ON `Results`
(`id`, `model_id`)"""
sql_drop_index_results_models = "drop index if exists index_results_models"
sql_insert_result = """insert into Results(model_id, scan, sym, emit, region, rfac)
values (:model_id, :scan, :sym, :emit, :region, :rfac)"""
sql_insert_result = """insert into Results(model_id, scan, domain, emit, region, rfac)
values (:model_id, :scan, :domain, :emit, :region, :rfac)"""
sql_update_result = """update Results
set rfac=:rfac
where id=:result_id"""
sql_select_result = """select id, model_id, scan, sym, emit, region, rfac
sql_select_result = """select id, model_id, scan, domain, emit, region, rfac
from Results where id=:id"""
sql_select_result_index = """select id, model_id, scan, sym, emit, region, rfac
from Results where model_id=:model_id and scan=:scan and sym=:sym and emit=:emit and region=:region"""
sql_select_result_index = """select id, model_id, scan, domain, emit, region, rfac
from Results where model_id=:model_id and scan=:scan and domain=:domain and emit=:emit and region=:region"""
sql_delete_result = """delete from Results where id = :result_id"""
sql_view_results_models = """create view if not exists `ViewResultsModels` as
select project_id, job_id, model_id, Results.id as result_id, rfac, model, scan, domain, emit, region
from Models
join Results on Results.model_id = Models.id
join Jobs on Jobs.id = Models.job_id
order by project_id, job_id, rfac, model, scan, domain, emit, region
"""
sql_create_params = """CREATE TABLE IF NOT EXISTS `Params` (
`id` INTEGER PRIMARY KEY,
@ -422,16 +430,6 @@ class ResultsDatabase(object):
# @var _lock_filename (str).
# path and name of the lock file or an empty string if no locking is used.
# @var _lock (obj).
# context manager which provides a locking mechanism for the database.
#
# this is either a fasteners.InterprocessLock or _DummyLock.
# InterprocessLock allows to serialize access to the database by means of a lock file.
# _DummyLock is used with an in-memory database which does not require locking.
#
# @note InterprocessLock is re-usable but not re-entrant.
# Be careful not to nest contexts when calling other methods from within this class!
def __init__(self):
self._conn = None
self._db_filename = ""
@ -440,7 +438,6 @@ class ResultsDatabase(object):
self._model_params = {}
self._tags = {}
self._lock_filename = ""
self._lock = None
def connect(self, db_filename, lock_filename=""):
"""
@ -469,14 +466,10 @@ class ResultsDatabase(object):
self._lock_filename = ""
else:
self._lock_filename = db_filename + ".lock"
if self._lock_filename:
self._lock = fasteners.InterProcessLock(self._lock_filename)
else:
self._lock = _DummyLock()
self._conn = sqlite3.connect(self._db_filename)
self._conn.row_factory = sqlite3.Row
with self._lock:
with self.lock():
self._conn.execute("PRAGMA foreign_keys = 1")
self._conn.commit()
c = self._conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='Models'")
@ -496,7 +489,6 @@ class ResultsDatabase(object):
if self._conn is not None:
self._conn.close()
self._conn = None
self._lock = None
def check_connection(self):
"""
@ -511,9 +503,25 @@ class ResultsDatabase(object):
@raise AssertionError if the connection is not valid.
"""
assert self._lock is not None, "database not connected"
assert self._conn is not None, "database not connected"
def lock(self):
"""
create a file-lock context manager for the database.
this is either a fasteners.InterProcessLock object on self._lock_filename
or a _DummyLock object if the database is in memory.
InterprocessLock allows to serialize access to the database by means of a lock file.
this is necessary if multiple pmsco instances require access to the same database.
_DummyLock is used with an in-memory database which does not require locking.
the lock object can be used as context-manager in a with statement.
"""
if self._lock_filename:
return fasteners.InterProcessLock(self._lock_filename)
else:
return _DummyLock()
def create_schema(self):
"""
create the database schema (tables and indices).
@ -525,7 +533,7 @@ class ResultsDatabase(object):
@return: None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
self._conn.execute(self.sql_create_projects)
self._conn.execute(self.sql_create_jobs)
self._conn.execute(self.sql_create_models)
@ -539,19 +547,23 @@ class ResultsDatabase(object):
self._conn.execute(self.sql_index_paramvalues)
self._conn.execute(self.sql_index_jobtags)
self._conn.execute(self.sql_index_models)
self._conn.execute(self.sql_view_results_models)
def register_project(self, name, code):
"""
register a project with the database.
@param name: name of the project. alphanumeric characters only. no spaces or special characters!
if a project of the same name exists in the database,
the id of the existing entry is returned.
the existing entry is not modified.
@param code: name of the pmsco module that defines the project.
@return: id value of the project in the database.
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(self.sql_select_project_name, {'name': name})
v = c.fetchone()
if v:
@ -574,7 +586,7 @@ class ResultsDatabase(object):
@return None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
param_dict = {'project_id': project_id}
self._conn.execute(self.sql_delete_project, param_dict)
@ -585,7 +597,11 @@ class ResultsDatabase(object):
@param project_id: identifier of the project. see register_project().
@param name: name of the job. up to the user, must be unique within a project.
@param name: name of the job. alphanumeric characters only. no spaces or special characters!
must be unique within a project.
if a job of the same name and same project exists in the database,
the id of the existing entry is returned.
the existing entry is not modified.
@param mode: optimization mode string (should be same as command line argument).
@ -600,7 +616,7 @@ class ResultsDatabase(object):
@return: id value of the job in the database.
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(self.sql_select_job_name, {'project_id': project_id, 'name': name})
v = c.fetchone()
if v:
@ -630,7 +646,7 @@ class ResultsDatabase(object):
@return None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
param_dict = {'job_id': job_id}
self._conn.execute(self.sql_delete_job, param_dict)
@ -669,7 +685,7 @@ class ResultsDatabase(object):
@return: id value of the job in the database
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
job_id = self._query_job_name(job_name, project_id=project_id)
return job_id
@ -686,7 +702,7 @@ class ResultsDatabase(object):
@return: id value of the parameter in the database.
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
return self._register_param(key)
def _register_param(self, key):
@ -721,7 +737,7 @@ class ResultsDatabase(object):
@return: None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
for key in model_params:
if key[0] != '_':
self._register_param(key)
@ -762,7 +778,7 @@ class ResultsDatabase(object):
params = {}
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(sql, args)
for row in c:
params[row['key']] = row['param_id']
@ -790,7 +806,7 @@ class ResultsDatabase(object):
@return: id value of the tag in the database.
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
return self._register_tag(key)
def _register_tag(self, key):
@ -825,7 +841,7 @@ class ResultsDatabase(object):
@return: None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
for key in tags:
self._register_tag(key)
@ -865,7 +881,7 @@ class ResultsDatabase(object):
tags = {}
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(sql, args)
for row in c:
tags[row['key']] = row['tag_id']
@ -889,7 +905,7 @@ class ResultsDatabase(object):
tags = {}
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(sql, args)
for row in c:
tags[row['key']] = row['value']
@ -912,7 +928,7 @@ class ResultsDatabase(object):
@return: None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
for key, value in tags.items():
try:
tag_id = self._tags[key]
@ -965,7 +981,7 @@ class ResultsDatabase(object):
params = self.query_project_params(project_id, job_id)
params.update(self._model_params)
param_names = sorted(params, key=lambda s: s.lower())
with self._lock, self._conn:
with self.lock(), self._conn:
if job_id:
view_name = "ViewModelsJob{0}".format(job_id)
else:
@ -1009,7 +1025,7 @@ class ResultsDatabase(object):
@raise KeyError if a parameter hasn't been registered.
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
# insert model record
model_dict = {'job_id': self.job_id, 'gen': None, 'particle': None}
model_dict.update(special_params(model_params))
@ -1036,7 +1052,7 @@ class ResultsDatabase(object):
@return None
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
param_dict = {'model_id': model_id}
self._conn.execute(self.sql_delete_model, param_dict)
@ -1048,7 +1064,7 @@ class ResultsDatabase(object):
@return: dict
"""
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(self.sql_select_paramvalue_model, {'model_id': model_id})
d = {}
for row in c:
@ -1084,7 +1100,7 @@ class ResultsDatabase(object):
@param filter: list of filter expressions.
each expression is a relational expression of the form <code>field operator value</code>,
where field is a unique field name of the Projects, Jobs, Models or Results table, e.g.
`job_id`, `model`, `rfac`, `scan`, `sym`, etc.
`job_id`, `model`, `rfac`, `scan`, `domain`, etc.
operator is one of the relational operators in SQL syntax.
value is a numeric or string constant, the latter including single or double quotes.
if the list is empty, no filtering is applied.
@ -1102,7 +1118,7 @@ class ResultsDatabase(object):
"""
self.check_connection()
filter += [" project_id = {0} ".format(self.project_id)]
with self._lock, self._conn:
with self.lock(), self._conn:
sql = "select distinct Models.id as model_id, model "
sql += "from Models "
sql += "join Results on Models.id = Results.model_id "
@ -1147,7 +1163,7 @@ class ResultsDatabase(object):
@param filter: list of filter expressions.
each expression is a relational expression of the form <code>field operator value</code>,
where field is a unique field name of the Projects, Jobs, Models or Results table, e.g.
`job_id`, `model`, `rfac`, `scan`, `sym`, etc.
`job_id`, `model`, `rfac`, `scan`, `domain`, etc.
operator is one of the relational operators in SQL syntax.
value is a numeric or string constant, the latter including single or double quotes.
if the list is empty, no filtering is applied.
@ -1161,9 +1177,9 @@ class ResultsDatabase(object):
"""
self.check_connection()
filter += [" project_id = {0} ".format(self.project_id)]
with self._lock, self._conn:
with self.lock(), self._conn:
sql = "select Results.id as result_id, model_id, job_id, "
sql += "model, scan, sym, emit, region, rfac, gen, particle "
sql += "model, scan, domain, emit, region, rfac, gen, particle "
sql += "from Models "
sql += "join Results on Models.id = Results.model_id "
sql += "join Jobs on Models.job_id = Jobs.id "
@ -1172,7 +1188,7 @@ class ResultsDatabase(object):
sql += "where "
sql += " and ".join(filter)
sql += " "
sql += "order by rfac, job_id, model, scan, sym, emit, region "
sql += "order by rfac, job_id, model, scan, domain, emit, region "
if limit:
sql += "limit {0} ".format(limit)
c = self._conn.execute(sql)
@ -1240,7 +1256,7 @@ class ResultsDatabase(object):
level_name = dispatch.CALC_LEVELS[4]
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
sql = "select Models.id from Models "
sql += "join Results on Models.id = Results.model_id "
sql += "join Jobs on Models.job_id = Jobs.id "
@ -1250,7 +1266,7 @@ class ResultsDatabase(object):
sql += "and Models.job_id in ({0}) ".format(",".join(map(str, job_ids)))
sql += "group by Models.job_id "
sql += "having min(rfac) "
sql += "order by rfac, job_id, model, scan, sym, emit, region "
sql += "order by rfac, job_id, model, scan, domain, emit, region "
c = self._conn.execute(sql)
models = [row['id'] for row in c]
@ -1261,7 +1277,7 @@ class ResultsDatabase(object):
query the task index used in a calculation job.
this query neglects the model index
and returns the unique tuples (-1, scan, sym, emit, region).
and returns the unique tuples (-1, scan, domain, emit, region).
@param job_id: (int) id of the associated Jobs entry.
if 0, self.job_id is used.
@ -1273,8 +1289,8 @@ class ResultsDatabase(object):
job_id = self.job_id
self.check_connection()
with self._lock, self._conn:
sql = "select scan, sym, emit, region "
with self.lock(), self._conn:
sql = "select scan, domain, emit, region "
sql += "from Models "
sql += "join Results on Models.id = Results.model_id "
sql += "join Jobs on Models.job_id = Jobs.id "
@ -1323,7 +1339,7 @@ class ResultsDatabase(object):
sql += "join Results on Models.id = Results.model_id "
sql += "where Models.job_id = :job_id "
sql += "and scan = :scan "
sql += "and sym = :sym "
sql += "and domain = :domain "
sql += "and emit = :emit "
sql += "and region = :region "
sql += "order by rfac "
@ -1334,7 +1350,7 @@ class ResultsDatabase(object):
tasks = self.query_tasks(job_id)
models = set([])
with self._lock, self._conn:
with self.lock(), self._conn:
for task in tasks:
if task.numeric_level <= level:
d = task._asdict()
@ -1360,7 +1376,7 @@ class ResultsDatabase(object):
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_sym', '_emit', '_region'.
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@ -1377,11 +1393,13 @@ class ResultsDatabase(object):
job_id = self.job_id
self.check_connection()
with self._lock, self._conn:
with self.lock(), self._conn:
model_id = self._insert_result_model(job_id, index, result)
result_id = self._insert_result_data(model_id, index, result)
self._insert_result_paramvalues(model_id, result)
logger.debug(BMsg("database insert result: job {}, model {}, result {}", job_id, model_id, result_id))
return result_id
def _insert_result_model(self, job_id, index, result):
@ -1402,7 +1420,7 @@ class ResultsDatabase(object):
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_sym', '_emit', '_region'.
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@ -1448,7 +1466,7 @@ class ResultsDatabase(object):
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_sym', '_emit', '_region'.
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
@ -1525,7 +1543,7 @@ class ResultsDatabase(object):
if not job_id:
job_id = self.job_id
data = np.genfromtxt(filename, names=True)
data = np.atleast_1d(np.genfromtxt(filename, names=True))
self.register_params(data.dtype.names)
try:
unique_models, unique_index = np.unique(data['_model'], True)
@ -1552,7 +1570,7 @@ class ResultsDatabase(object):
model = unique_models[0]
result_entry = {'model_id': model_ids[model],
'scan': -1,
'sym': -1,
'domain': -1,
'emit': -1,
'region': -1,
'rfac': None}
@ -1571,7 +1589,7 @@ class ResultsDatabase(object):
'value': value}
yield param_entry
with self._lock, self._conn:
with self.lock(), self._conn:
c = self._conn.execute(self.sql_select_model_job, {'job_id': job_id})
v = c.fetchone()
if v: