public distro 2.1.0
This commit is contained in:
@ -342,6 +342,53 @@ class ResultsDatabase(object):
|
||||
where param_id = :param_id and model_id = :model_id
|
||||
"""
|
||||
|
||||
sql_create_tags = """CREATE TABLE IF NOT EXISTS `Tags` (
|
||||
`id` INTEGER PRIMARY KEY,
|
||||
`key` TEXT NOT NULL UNIQUE COLLATE NOCASE
|
||||
)"""
|
||||
sql_insert_tag = "insert into Tags(key) values (:key)"
|
||||
sql_select_tag = "select key from Tags where id=:id"
|
||||
sql_select_tag_key = "select id, key from Tags where key=:key"
|
||||
sql_select_tag_project = """select distinct key, tag_id from Jobs
|
||||
join JobTags on Jobs.id = JobTags.job_id
|
||||
join Tags on Tags.id = JobTags.tag_id
|
||||
where Jobs.project_id = :project_id
|
||||
order by key collate nocase"""
|
||||
sql_select_tag_job = """select distinct key, tag_id from JobTags
|
||||
join Tags on Tags.id = JobTags.tag_id
|
||||
where JobTags.job_id = :job_id
|
||||
order by key collate nocase"""
|
||||
|
||||
sql_create_jobtags = """CREATE TABLE IF NOT EXISTS `JobTags` (
|
||||
`id` INTEGER PRIMARY KEY,
|
||||
`tag_id` INTEGER NOT NULL,
|
||||
`job_id` INTEGER NOT NULL,
|
||||
`value` TEXT COLLATE NOCASE,
|
||||
FOREIGN KEY(tag_id) REFERENCES Tags(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(job_id) REFERENCES Jobs(id) ON DELETE CASCADE
|
||||
)"""
|
||||
sql_index_jobtags = """create index if not exists
|
||||
`index_jobtags` ON `JobTags`
|
||||
(`tag_id`, `job_id`)"""
|
||||
sql_drop_index_jobtags = "drop index if exists index_jobtags"
|
||||
sql_insert_jobtag = """
|
||||
insert into JobTags(tag_id, job_id, value)
|
||||
values (:tag_id, :job_id, :value)
|
||||
"""
|
||||
sql_update_jobtag = """
|
||||
update JobTags set value=:value where id=:jobtag_id
|
||||
"""
|
||||
sql_select_jobtag_job = """
|
||||
select key, value from JobTags
|
||||
join Tags on JobTags.tag_id = Tags.id
|
||||
where job_id = :job_id
|
||||
"""
|
||||
sql_select_jobtag = """
|
||||
select JobTags.id as id, key, value from JobTags
|
||||
join Tags on JobTags.tag_id = Tags.id
|
||||
where tag_id = :tag_id and job_id = :job_id
|
||||
"""
|
||||
|
||||
# @var _conn (sqlite3.Connection).
|
||||
# connection interface to the database.
|
||||
#
|
||||
@ -391,6 +438,7 @@ class ResultsDatabase(object):
|
||||
self.project_id = 0
|
||||
self.job_id = 0
|
||||
self._model_params = {}
|
||||
self._tags = {}
|
||||
self._lock_filename = ""
|
||||
self._lock = None
|
||||
|
||||
@ -484,9 +532,12 @@ class ResultsDatabase(object):
|
||||
self._conn.execute(self.sql_create_results)
|
||||
self._conn.execute(self.sql_create_params)
|
||||
self._conn.execute(self.sql_create_paramvalues)
|
||||
self._conn.execute(self.sql_create_tags)
|
||||
self._conn.execute(self.sql_create_jobtags)
|
||||
self._conn.execute(self.sql_index_results_tasks)
|
||||
self._conn.execute(self.sql_index_results_models)
|
||||
self._conn.execute(self.sql_index_paramvalues)
|
||||
self._conn.execute(self.sql_index_jobtags)
|
||||
self._conn.execute(self.sql_index_models)
|
||||
|
||||
def register_project(self, name, code):
|
||||
@ -583,6 +634,46 @@ class ResultsDatabase(object):
|
||||
param_dict = {'job_id': job_id}
|
||||
self._conn.execute(self.sql_delete_job, param_dict)
|
||||
|
||||
def _query_job_name(self, job_name, project_id=0):
|
||||
"""
|
||||
(internal) query a job by name
|
||||
|
||||
this is the internal analog of @ref query_job_name
|
||||
which asserts an acquired lock and open connection.
|
||||
|
||||
@param job_name: name of the job
|
||||
|
||||
@param project_id: project identifier.
|
||||
by default, the current project self.project_id is used.
|
||||
|
||||
@return: id value of the job in the database
|
||||
|
||||
@raise DatabaseError if the job can't be found.
|
||||
"""
|
||||
if project_id == 0:
|
||||
project_id = self.project_id
|
||||
param_dict = {'project_id': project_id, 'name': job_name}
|
||||
c = self._conn.execute(self.sql_select_job_name, param_dict)
|
||||
v = c.fetchone()
|
||||
return v[0]
|
||||
|
||||
def query_job_name(self, job_name, project_id=0):
|
||||
"""
|
||||
query a job by name
|
||||
|
||||
@param job_name: name of the job
|
||||
|
||||
@param project_id: project identifier.
|
||||
by default, the current project self.project_id is used.
|
||||
|
||||
@return: id value of the job in the database
|
||||
"""
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
job_id = self._query_job_name(job_name, project_id=project_id)
|
||||
|
||||
return job_id
|
||||
|
||||
def register_param(self, key):
|
||||
"""
|
||||
register a parameter key with the database.
|
||||
@ -681,6 +772,165 @@ class ResultsDatabase(object):
|
||||
|
||||
return params
|
||||
|
||||
def register_tag(self, key):
|
||||
"""
|
||||
register a tag with the database.
|
||||
|
||||
tags are a way of structuring a job description.
|
||||
they can be used to, for instance, distinguish calculations made with different clusters,
|
||||
different experimental data, etc.
|
||||
a job tag has a key and a value, and is associated to a job.
|
||||
the use of tags is up to the user. pmsco does not change or read them.
|
||||
|
||||
each tag name must be registered once before a value can be written to the database.
|
||||
see the class description for an explanation.
|
||||
|
||||
@param key: key (name) of the tag.
|
||||
|
||||
@return: id value of the tag in the database.
|
||||
"""
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
return self._register_tag(key)
|
||||
|
||||
def _register_tag(self, key):
|
||||
"""
|
||||
register a tag with the database without committing the transaction.
|
||||
|
||||
@note this method does not lock the database file and does not commit.
|
||||
to lock the database and commit the transaction, call the public method register_tag().
|
||||
|
||||
@param key: key (name) of the tag.
|
||||
|
||||
@return: id value of the tag in the database.
|
||||
"""
|
||||
c = self._conn.execute(self.sql_select_tag_key, {'key': key})
|
||||
v = c.fetchone()
|
||||
if v:
|
||||
tag_id = v[0]
|
||||
else:
|
||||
c = self._conn.execute(self.sql_insert_tag, {'key': key})
|
||||
tag_id = c.lastrowid
|
||||
self._tags[key] = tag_id
|
||||
return tag_id
|
||||
|
||||
def register_tags(self, tags):
|
||||
"""
|
||||
register the tags of this project with the database.
|
||||
|
||||
each tag name must be registered once before a value can be written to the database.
|
||||
see the class description for an explanation.
|
||||
|
||||
@param tags: sequence of tag keys, or dictionary of tags.
|
||||
@return: None
|
||||
"""
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
for key in tags:
|
||||
self._register_tag(key)
|
||||
|
||||
def query_tags(self, project_id=0, job_id=0, update_registry=False):
|
||||
"""
|
||||
query a list of tag keys used in a project or job.
|
||||
|
||||
optionally, the local registry can be updated with the results of the query.
|
||||
this should be done if the database is read only and the client does not know the tag names.
|
||||
see the class description for a description of the registry.
|
||||
|
||||
@note this method returns the tags that are used with jobs in the database.
|
||||
if you have registered additional tags but not attached them to jobs,
|
||||
this method will _not_ list them.
|
||||
|
||||
@param project_id: project identifier.
|
||||
by default, the current project self.project_id is used.
|
||||
|
||||
@param job_id: job identifier.
|
||||
by default, all jobs of the selected project are included in the query.
|
||||
if a job is specified, the project_id parameter is ignored.
|
||||
|
||||
@param update_registry: update the local tags registry (self._tags).
|
||||
with the query results.
|
||||
|
||||
@return: dictionary of tags.
|
||||
the keys are the tag names, the values are the tag ids in the database.
|
||||
"""
|
||||
if project_id == 0:
|
||||
project_id = self.project_id
|
||||
if job_id == 0:
|
||||
sql = self.sql_select_tag_project
|
||||
args = {'project_id': project_id}
|
||||
else:
|
||||
sql = self.sql_select_tag_job
|
||||
args = {'job_id': job_id}
|
||||
|
||||
tags = {}
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
c = self._conn.execute(sql, args)
|
||||
for row in c:
|
||||
tags[row['key']] = row['tag_id']
|
||||
|
||||
if update_registry:
|
||||
self._tags.update(tags)
|
||||
|
||||
return tags
|
||||
|
||||
def query_job_tags(self, job_id):
|
||||
"""
|
||||
query a list of tags (keys and values) associated with a job.
|
||||
|
||||
@param job_id: job identifier.
|
||||
|
||||
@return: dictionary of tags.
|
||||
the keys are the tag names, the values are the tag values.
|
||||
"""
|
||||
sql = self.sql_select_jobtag_job
|
||||
args = {'job_id': job_id}
|
||||
|
||||
tags = {}
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
c = self._conn.execute(sql, args)
|
||||
for row in c:
|
||||
tags[row['key']] = row['value']
|
||||
|
||||
return tags
|
||||
|
||||
def insert_jobtags(self, job_id, tags):
|
||||
"""
|
||||
add or update job tags in the database.
|
||||
|
||||
the method updates the JobTags table.
|
||||
|
||||
@param job_id: (int) primary key of the job entry in the Jobs table.
|
||||
the entry must exist.
|
||||
|
||||
@param tags: (dict) dictionary containing the tags.
|
||||
keys are matched or added to the Tags table,
|
||||
values are added to the JobTags table and linked to the job and tag key.
|
||||
|
||||
@return: None
|
||||
"""
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
for key, value in tags.items():
|
||||
try:
|
||||
tag_id = self._tags[key]
|
||||
except KeyError:
|
||||
tag_id = self._register_tag(key)
|
||||
v = None
|
||||
else:
|
||||
jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
|
||||
c = self._conn.execute(self.sql_select_jobtag, jobtag_entry)
|
||||
v = c.fetchone()
|
||||
|
||||
if v:
|
||||
jobtag_entry = {'jobtag_id': v[0], 'tag_id': tag_id, 'job_id': job_id, 'value': value}
|
||||
self._conn.execute(self.sql_update_jobtag, jobtag_entry)
|
||||
else:
|
||||
jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
|
||||
self._conn.execute(self.sql_insert_jobtag, jobtag_entry)
|
||||
|
||||
def create_models_view(self, job_id=0, temporary=False):
|
||||
"""
|
||||
create a flat (pivot) view of model parameters of the current project or job.
|
||||
@ -878,7 +1128,7 @@ class ResultsDatabase(object):
|
||||
results = c.fetchall()
|
||||
|
||||
names = [desc[0] for desc in c.description]
|
||||
dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names)])
|
||||
dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names, key=str.lower)])
|
||||
out_array = np.zeros((count,), dtype=dt)
|
||||
for idx, row in enumerate(results):
|
||||
for name in names:
|
||||
@ -942,6 +1192,70 @@ class ResultsDatabase(object):
|
||||
|
||||
return out_array
|
||||
|
||||
def query_best_models_per_jobs(self, job_ids=None, task_level='model'):
|
||||
"""
|
||||
return the best model (by rfac) of each selected job
|
||||
|
||||
the query gathers the R-factors of the selected jobs at the selected task levels
|
||||
and, for each job, returns the (database) model id where the lowest R-factor is reported
|
||||
among the gathered results.
|
||||
|
||||
this can be useful if you want to compile a report of the best model per job.
|
||||
|
||||
@param job_ids: iterable of job ids to include in the query.
|
||||
the job ids must belong to the current project.
|
||||
if empty or non-specified, all jobs of the current project are included.
|
||||
|
||||
@param task_level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
|
||||
deepest task_level to include in the query.
|
||||
results on deeper levels are not considered.
|
||||
e.g. if you pass 'scan', R-factors of individual scans are included in the query.
|
||||
note that including deeper levels will not increase the number of results returned.
|
||||
|
||||
@return sequence of model_id.
|
||||
the number of results corresponds to the number of jobs in the filter scope.
|
||||
to find out details of the models, execute another query that filters on these model ids.
|
||||
|
||||
the method produces an SQL query similar to:
|
||||
@code{.sql}
|
||||
select Models.id from Models
|
||||
join Results on Models.id = Results.model_id
|
||||
join Jobs on Models.job_id = Jobs.id
|
||||
where scan=-1
|
||||
and project_id=1
|
||||
and job_id in (1,2,3)
|
||||
group by Models.job_id
|
||||
having min(rfac)
|
||||
order by rfac
|
||||
@endcode
|
||||
"""
|
||||
|
||||
try:
|
||||
level = dispatch.CALC_LEVELS.index(task_level) + 1
|
||||
except ValueError:
|
||||
level = task_level + 1
|
||||
try:
|
||||
level_name = dispatch.CALC_LEVELS[level]
|
||||
except IndexError:
|
||||
level_name = dispatch.CALC_LEVELS[4]
|
||||
|
||||
self.check_connection()
|
||||
with self._lock, self._conn:
|
||||
sql = "select Models.id from Models "
|
||||
sql += "join Results on Models.id = Results.model_id "
|
||||
sql += "join Jobs on Models.job_id = Jobs.id "
|
||||
sql += "where project_id = {0} ".format(self.project_id)
|
||||
sql += "and {0} = -1 ".format(level_name)
|
||||
if job_ids:
|
||||
sql += "and Models.job_id in ({0}) ".format(",".join(map(str, job_ids)))
|
||||
sql += "group by Models.job_id "
|
||||
sql += "having min(rfac) "
|
||||
sql += "order by rfac, job_id, model, scan, sym, emit, region "
|
||||
c = self._conn.execute(sql)
|
||||
models = [row['id'] for row in c]
|
||||
|
||||
return models
|
||||
|
||||
def query_tasks(self, job_id=0):
|
||||
"""
|
||||
query the task index used in a calculation job.
|
||||
@ -1213,13 +1527,18 @@ class ResultsDatabase(object):
|
||||
|
||||
data = np.genfromtxt(filename, names=True)
|
||||
self.register_params(data.dtype.names)
|
||||
unique_models, unique_index = np.unique(data['_model'], True)
|
||||
try:
|
||||
unique_models, unique_index = np.unique(data['_model'], True)
|
||||
except ValueError:
|
||||
unique_models = np.array([0])
|
||||
unique_index = np.array([0])
|
||||
unique_data = data[unique_index]
|
||||
model_ids = {}
|
||||
|
||||
def model_entry_generator():
|
||||
for result in unique_data:
|
||||
model_entry = {'job_id': job_id,
|
||||
'model': unique_models[0],
|
||||
'gen': None,
|
||||
'particle': None}
|
||||
model_entry.update(special_params(result))
|
||||
@ -1227,7 +1546,11 @@ class ResultsDatabase(object):
|
||||
|
||||
def result_entry_generator():
|
||||
for result in data:
|
||||
result_entry = {'model_id': model_ids[result['_model']],
|
||||
try:
|
||||
model = result['_model']
|
||||
except ValueError:
|
||||
model = unique_models[0]
|
||||
result_entry = {'model_id': model_ids[model],
|
||||
'scan': -1,
|
||||
'sym': -1,
|
||||
'emit': -1,
|
||||
@ -1238,8 +1561,12 @@ class ResultsDatabase(object):
|
||||
|
||||
def param_entry_generator():
|
||||
for result in unique_data:
|
||||
try:
|
||||
model = result['_model']
|
||||
except ValueError:
|
||||
model = unique_models[0]
|
||||
for key, value in regular_params(result).items():
|
||||
param_entry = {'model_id': model_ids[result['_model']],
|
||||
param_entry = {'model_id': model_ids[model],
|
||||
'param_id': self._model_params[key],
|
||||
'value': value}
|
||||
yield param_entry
|
||||
|
Reference in New Issue
Block a user