public release 4.2.0 - see README.md and CHANGES.md for details
This commit is contained in:
158
pmsco/database/project.py
Normal file
158
pmsco/database/project.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
@package pmsco.database.project
|
||||
wrapper class for project-specific database operations
|
||||
|
||||
|
||||
usage:
|
||||
~~~~~~{.py}
|
||||
db = DatabaseAccess()
|
||||
db.connect("file.db")
|
||||
with db.session():
|
||||
# database access here
|
||||
# ...
|
||||
# commit transaction
|
||||
session.commit()
|
||||
# continue in new transaction
|
||||
# ...
|
||||
|
||||
# at the end of the context
|
||||
# the session is closed and orm objects are detached from the database.
|
||||
~~~~~~
|
||||
|
||||
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
|
||||
|
||||
@copyright (c) 2016-21 by Paul Scherrer Institut @n
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import socket
|
||||
from pmsco.database.access import DatabaseAccess
|
||||
import pmsco.database.common as db_common
|
||||
import pmsco.database.ingest as db_ingest
|
||||
import pmsco.database.query as db_query
|
||||
from pmsco.dispatch import mpi_size
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProjectDatabase(DatabaseAccess):
|
||||
"""
|
||||
wrapper class for project specific database operations
|
||||
|
||||
the purpose of this class is to bundle all specific code and run-time information
|
||||
for database access of a running calculation job.
|
||||
|
||||
after calling ingest_project_metadata(),
|
||||
the class object stores the persistent project and job identifiers.
|
||||
the other methods provide convenient wrappers so that database code can be kept minimal in the project.
|
||||
|
||||
usage:
|
||||
~~~~~~{.py}
|
||||
db = ProjectDatabase()
|
||||
db.connect('file.db')
|
||||
db.ingest_project_metadata(...)
|
||||
for result in results:
|
||||
db.ingest_result(result...)
|
||||
~~~~~~
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.db_project_id = None
|
||||
self.db_job_id = None
|
||||
|
||||
def ingest_project_metadata(self, project):
|
||||
"""
|
||||
ingest project metadata into the database
|
||||
|
||||
@param project: pmsco.project.Project object
|
||||
|
||||
@return: None
|
||||
"""
|
||||
with self.session() as session:
|
||||
db_project = db_common.register_project(session=session,
|
||||
name=project.project_name,
|
||||
code=project.__module__,
|
||||
allow_existing=True)
|
||||
|
||||
db_job = db_common.register_job(session=session,
|
||||
project=db_project,
|
||||
job_name=project.job_name,
|
||||
allow_existing=False,
|
||||
mode=project.mode,
|
||||
machine=socket.gethostname(),
|
||||
git_hash=project.git_hash,
|
||||
datetime=datetime.datetime.now(),
|
||||
processes=mpi_size,
|
||||
hours=project.timedelta_limit.total_seconds() / 3600.,
|
||||
description=project.description)
|
||||
|
||||
db_common.register_job_tags(session, db_job, project.job_tags)
|
||||
db_common.register_params(session, project.model_space.start.keys())
|
||||
session.commit()
|
||||
|
||||
self.db_project_id = db_project.id
|
||||
self.db_job_id = db_job.id
|
||||
|
||||
def ingest_result(self, index, result, delta):
|
||||
"""
|
||||
add or update a result in the database.
|
||||
|
||||
the method updates the Models, Results and ParamValues tables.
|
||||
|
||||
the model is identified by self.job_id and index.model.
|
||||
the result is identified by self.job_id and index.
|
||||
if the model or result exists in the database, it is updated.
|
||||
|
||||
@param index: (pmsco.dispatch.CalcID or dict)
|
||||
calculation index.
|
||||
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
|
||||
'_model', '_scan', '_domain', '_emit', '_region'.
|
||||
extra values in the dictionary are ignored.
|
||||
undefined indices must be -1.
|
||||
|
||||
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
|
||||
may also contain the special values '_gen', '_particle', '_timestamp'.
|
||||
'_gen' and '_particle' are integers and default to None.
|
||||
'_timestamp' can be numeric (seconds since jan 1, 1970)
|
||||
or an object that implements a timestamp function like datetime.datetime.
|
||||
it defaults to the current (local) time.
|
||||
|
||||
@param delta: (dict) dictionary containing the delta values.
|
||||
the keys must correspond to model keys in the result dictionary.
|
||||
this argument is optional.
|
||||
"""
|
||||
assert self.db_project_id is not None
|
||||
assert self.db_job_id is not None
|
||||
with self.session() as session:
|
||||
job_obj = db_common.get_job(session, self.db_project_id, self.db_job_id)
|
||||
model_obj = db_ingest.store_model(session, job_obj, index, result)
|
||||
db_ingest.store_result_data(session, model_obj, index, result)
|
||||
db_ingest.store_param_values(session, model_obj, result, delta)
|
||||
session.commit()
|
||||
|
||||
def query_best_task_models(self, level, count):
|
||||
"""
|
||||
query N best models per task.
|
||||
|
||||
this is a wrapper for pmsco.database.query.query_best_task_models().
|
||||
in addition to the wrapped function, it opens a session and uses the registered db_job_id.
|
||||
|
||||
this query is used by the file tracker to determine the models to keep.
|
||||
|
||||
@param level: level up to which to query.
|
||||
the level can be specified by level name (str) or numeric index (0..4).
|
||||
if it is scan (equivalent to 1), the method queries the model and scan levels.
|
||||
@param count: number of models to query per task.
|
||||
|
||||
@return set of matching model numbers (model index, Models.model field).
|
||||
"""
|
||||
with self.session() as session:
|
||||
models = db_query.query_best_task_models(session, self.db_job_id, level, count)
|
||||
|
||||
return models
|
||||
Reference in New Issue
Block a user