159 lines
6.0 KiB
Python
159 lines
6.0 KiB
Python
"""
|
|
@package pmsco.database.project
|
|
wrapper class for project-specific database operations
|
|
|
|
|
|
usage:
|
|
~~~~~~{.py}
|
|
db = DatabaseAccess()
|
|
db.connect("file.db")
|
|
with db.session():
|
|
# database access here
|
|
# ...
|
|
# commit transaction
|
|
session.commit()
|
|
# continue in new transaction
|
|
# ...
|
|
|
|
# at the end of the context
|
|
# the session is closed and orm objects are detached from the database.
|
|
~~~~~~
|
|
|
|
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
|
|
|
@copyright (c) 2016-21 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import datetime
|
|
import logging
|
|
import socket
|
|
from pmsco.database.access import DatabaseAccess
|
|
import pmsco.database.common as db_common
|
|
import pmsco.database.ingest as db_ingest
|
|
import pmsco.database.query as db_query
|
|
from pmsco.dispatch import mpi_size
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ProjectDatabase(DatabaseAccess):
|
|
"""
|
|
wrapper class for project specific database operations
|
|
|
|
the purpose of this class is to bundle all specific code and run-time information
|
|
for database access of a running calculation job.
|
|
|
|
after calling ingest_project_metadata(),
|
|
the class object stores the persistent project and job identifiers.
|
|
the other methods provide convenient wrappers so that database code can be kept minimal in the project.
|
|
|
|
usage:
|
|
~~~~~~{.py}
|
|
db = ProjectDatabase()
|
|
db.connect('file.db')
|
|
db.ingest_project_metadata(...)
|
|
for result in results:
|
|
db.ingest_result(result...)
|
|
~~~~~~
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.db_project_id = None
|
|
self.db_job_id = None
|
|
|
|
def ingest_project_metadata(self, project):
|
|
"""
|
|
ingest project metadata into the database
|
|
|
|
@param project: pmsco.project.Project object
|
|
|
|
@return: None
|
|
"""
|
|
with self.session() as session:
|
|
db_project = db_common.register_project(session=session,
|
|
name=project.project_name,
|
|
code=project.__module__,
|
|
allow_existing=True)
|
|
|
|
db_job = db_common.register_job(session=session,
|
|
project=db_project,
|
|
job_name=project.job_name,
|
|
allow_existing=False,
|
|
mode=project.mode,
|
|
machine=socket.gethostname(),
|
|
git_hash=project.git_hash,
|
|
datetime=datetime.datetime.now(),
|
|
processes=mpi_size,
|
|
hours=project.timedelta_limit.total_seconds() / 3600.,
|
|
description=project.description)
|
|
|
|
db_common.register_job_tags(session, db_job, project.job_tags)
|
|
db_common.register_params(session, project.model_space.start.keys())
|
|
session.commit()
|
|
|
|
self.db_project_id = db_project.id
|
|
self.db_job_id = db_job.id
|
|
|
|
def ingest_result(self, index, result, delta):
|
|
"""
|
|
add or update a result in the database.
|
|
|
|
the method updates the Models, Results and ParamValues tables.
|
|
|
|
the model is identified by self.job_id and index.model.
|
|
the result is identified by self.job_id and index.
|
|
if the model or result exists in the database, it is updated.
|
|
|
|
@param index: (pmsco.dispatch.CalcID or dict)
|
|
calculation index.
|
|
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
|
|
'_model', '_scan', '_domain', '_emit', '_region'.
|
|
extra values in the dictionary are ignored.
|
|
undefined indices must be -1.
|
|
|
|
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
|
|
may also contain the special values '_gen', '_particle', '_timestamp'.
|
|
'_gen' and '_particle' are integers and default to None.
|
|
'_timestamp' can be numeric (seconds since jan 1, 1970)
|
|
or an object that implements a timestamp function like datetime.datetime.
|
|
it defaults to the current (local) time.
|
|
|
|
@param delta: (dict) dictionary containing the delta values.
|
|
the keys must correspond to model keys in the result dictionary.
|
|
this argument is optional.
|
|
"""
|
|
assert self.db_project_id is not None
|
|
assert self.db_job_id is not None
|
|
with self.session() as session:
|
|
job_obj = db_common.get_job(session, self.db_project_id, self.db_job_id)
|
|
model_obj = db_ingest.store_model(session, job_obj, index, result)
|
|
db_ingest.store_result_data(session, model_obj, index, result)
|
|
db_ingest.store_param_values(session, model_obj, result, delta)
|
|
session.commit()
|
|
|
|
def query_best_task_models(self, level, count):
|
|
"""
|
|
query N best models per task.
|
|
|
|
this is a wrapper for pmsco.database.query.query_best_task_models().
|
|
in addition to the wrapped function, it opens a session and uses the registered db_job_id.
|
|
|
|
this query is used by the file tracker to determine the models to keep.
|
|
|
|
@param level: level up to which to query.
|
|
the level can be specified by level name (str) or numeric index (0..4).
|
|
if it is scan (equivalent to 1), the method queries the model and scan levels.
|
|
@param count: number of models to query per task.
|
|
|
|
@return set of matching model numbers (model index, Models.model field).
|
|
"""
|
|
with self.session() as session:
|
|
models = db_query.query_best_task_models(session, self.db_job_id, level, count)
|
|
|
|
return models
|