""" @package tests.test_database unit tests for pmsco.database the purpose of these tests is to help debugging the code. to run the tests, change to the directory which contains the tests directory, and execute =nosetests=. @pre nose must be installed (python-nose package on Debian). @author Matthias Muntwiler, matthias.muntwiler@psi.ch @copyright (c) 2016 by Paul Scherrer Institut @n Licensed under the Apache License, Version 2.0 (the "License"); @n you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import unittest import datetime import os.path import tempfile import shutil import numpy as np import pmsco.database as db import pmsco.dispatch as dispatch import pmsco.optimizers.population as population class TestDatabase(unittest.TestCase): def setUp(self): self.test_dir = tempfile.mkdtemp() self.lock_filename = os.path.join(self.test_dir, "test_database.lock") self.db = db.ResultsDatabase() self.db.connect(":memory:", lock_filename=self.lock_filename) def tearDown(self): self.db.disconnect() shutil.rmtree(self.test_dir) @classmethod def setup_class(cls): # before any methods in this class pass @classmethod def teardown_class(cls): # teardown_class() after any methods in this class pass def test_regular_params(self): d1 = {'parA': 1.234, 'par_B': 5.678, '_model': 91, '_rfac': 0.534} d2 = db.regular_params(d1) d3 = {'parA': d1['parA'], 'par_B': d1['par_B']} self.assertEqual(d2, d3) self.assertIsNot(d2, d1) def test_special_params(self): d1 = {'parA': 1.234, 'par_B': 5.678, '_model': 91, '_rfac': 0.534, '_db_model': 99} d2 = db.special_params(d1) d3 = {'model': d1['_model'], 'rfac': d1['_rfac']} self.assertEqual(d2, d3) self.assertIsNot(d2, d1) dt = [('parA', 'f4'), ('par_B', 'f4'), ('_model', 'i4'), ('_rfac', 'f4'), ('_db_model', 'f4')] arr = np.zeros(1, dtype=dt) for k, v in d1.items(): arr[0][k] = v d4 = db.special_params(arr[0]) self.assertEqual(d4.keys(), d3.keys()) for k in d4: self.assertAlmostEqual(d4[k], d3[k]) cid1 = dispatch.CalcID(1, 2, 3, 4, -1) cid2 = db.special_params(cid1) cid3 = {'model': 1, 'scan': 2, 'domain': 3, 'emit': 4, 'region': -1} self.assertEqual(cid2, cid3) l1 = d1.keys() l2 = db.special_params(l1) l3 = d3.keys() self.assertEqual(list(l2), list(l3)) t1 = tuple(l1) t2 = db.special_params(t1) t3 = tuple(l3) self.assertEqual(t2, t3) def setup_sample_database(self): self.db.register_project("oldproject", "oldcode") self.db.register_project("unittest", "testcode") self.db.register_job(self.db.project_id, "testjob", "testmode", "testhost", None, datetime.datetime.now()) self.ex_model = {'parA': 1.234, 'parB': 5.678, '_model': 91, '_rfac': 0.534} self.db.register_params(self.ex_model) self.db.insert_model(self.ex_model) self.db.create_models_view() def test_register_project(self): id1 = self.db.register_project("unittest1", "Atest") self.assertIsInstance(id1, int) self.assertEqual(id1, self.db.project_id) id2 = self.db.register_project("unittest2", "Btest") self.assertIsInstance(id2, int) self.assertEqual(id2, self.db.project_id) id3 = self.db.register_project("unittest1", "Ctest") self.assertIsInstance(id3, int) self.assertEqual(id3, self.db.project_id) self.assertNotEqual(id1, id2) self.assertEqual(id1, id3) c = self.db._conn.cursor() c.execute("select count(*) from Projects") count = c.fetchone() self.assertEqual(count[0], 2) c.execute("select name, code from Projects where id=:id", {'id': id1}) row = c.fetchone() self.assertIsNotNone(row) self.assertEqual(len(row), 2) self.assertEqual(row[0], "unittest1") self.assertEqual(row[1], "Atest") self.assertEqual(row['name'], "unittest1") self.assertEqual(row['code'], "Atest") def test_register_job(self): pid1 = self.db.register_project("unittest1", "Acode") pid2 = self.db.register_project("unittest2", "Bcode") dt1 = datetime.datetime.now() # insert new job id1 = self.db.register_job(pid1, "Ajob", "Amode", "local", "Ahash", dt1, "Adesc") self.assertIsInstance(id1, int) self.assertEqual(id1, self.db.job_id) # insert another job id2 = self.db.register_job(pid1, "Bjob", "Amode", "local", "Ahash", dt1, "Adesc") self.assertIsInstance(id2, int) self.assertEqual(id2, self.db.job_id) # update first job id3 = self.db.register_job(pid1, "Ajob", "Cmode", "local", "Chash", dt1, "Cdesc") self.assertIsInstance(id3, int) self.assertEqual(id3, self.db.job_id) # insert another job with same name but in other project id4 = self.db.register_job(pid2, "Ajob", "Dmode", "local", "Dhash", dt1, "Ddesc") self.assertIsInstance(id4, int) self.assertEqual(id4, self.db.job_id) self.assertNotEqual(id1, id2) self.assertEqual(id1, id3) self.assertNotEqual(id1, id4) c = self.db._conn.cursor() c.execute("select count(*) from Jobs") count = c.fetchone() self.assertEqual(count[0], 3) c.execute("select name, mode, machine, git_hash, datetime, description from Jobs where id=:id", {'id': id1}) row = c.fetchone() self.assertIsNotNone(row) self.assertEqual(len(row), 6) self.assertEqual(row[0], "Ajob") self.assertEqual(row[1], "Amode") self.assertEqual(row['machine'], "local") self.assertEqual(str(row['datetime']), str(dt1)) self.assertEqual(row['git_hash'], "Ahash") self.assertEqual(row['description'], "Adesc") def test_register_params(self): self.setup_sample_database() model5 = {'parA': 2.341, 'parC': 6.785, '_model': 92, '_rfac': 0.453} self.db.register_params(model5) expected = ['parA', 'parB', 'parC'] c = self.db._conn.cursor() c.execute("select * from Params order by key") results = c.fetchall() self.assertEqual(len(results), 3) result_params = [row['key'] for row in results] self.assertEqual(result_params, expected) def test_query_project_params(self): self.setup_sample_database() project1 = self.db.project_id self.db.register_project("unittest2", "testcode2") self.db.register_job(self.db.project_id, "testjob2", "test", "localhost", None, datetime.datetime.now()) model5 = {'parA': 2.341, 'parC': 6.785, '_model': 92, '_rfac': 0.453} self.db.register_params(model5) self.db.insert_model(model5) results = self.db.query_project_params(project_id=project1) expected = ['parA', 'parB'] self.assertEqual(expected, sorted(list(results.keys()))) def test_insert_model(self): self.setup_sample_database() c = self.db._conn.cursor() c.execute("select count(*) from Models") count = c.fetchone() self.assertEqual(count[0], 1) c.execute("select * from Models") row = c.fetchone() model_id = row['id'] self.assertIsInstance(model_id, int) self.assertEqual(row['job_id'], self.db.job_id) self.assertEqual(row['model'], self.ex_model['_model']) self.assertIsNone(row['gen']) self.assertIsNone(row['particle']) sql = "select key, value from ParamValues " + \ "join Params on ParamValues.param_id = Params.id " + \ "where model_id = :model_id" c.execute(sql, {'model_id': model_id}) result = c.fetchall() # list of Row objects self.assertEqual(len(result), 2) for row in result: self.assertAlmostEqual(row['value'], self.ex_model[row['key']]) def test_query_model(self): self.setup_sample_database() c = self.db._conn.cursor() c.execute("select * from Models") row = c.fetchone() model_id = row['id'] model = self.db.query_model(model_id) del self.ex_model['_model'] del self.ex_model['_rfac'] self.assertEqual(model, self.ex_model) def test_query_model_array(self): self.setup_sample_database() index = {'_scan': -1, '_domain': -1, '_emit': -1, '_region': -1} model2 = {'parA': 4.123, 'parB': 8.567, '_model': 92, '_rfac': 0.654} model3 = {'parA': 3.412, 'parB': 7.856, '_model': 93, '_rfac': 0.345} model4 = {'parA': 4.123, 'parB': 8.567, '_model': 94, '_rfac': 0.354} model5 = {'parA': 2.341, 'parC': 6.785, '_model': 95, '_rfac': 0.453} model6 = {'parA': 4.123, 'parB': 8.567, '_model': 96, '_rfac': 0.354} self.db.register_params(model5) self.db.create_models_view() model2.update(index) model3.update(index) model4.update(index) model5.update(index) model6.update(index) self.db.insert_result(model2, model2) self.db.insert_result(model3, model3) self.db.insert_result(model4, model4) self.db.insert_result(model5, model5) self.db.insert_result(model6, model6) # only model3, model4 and model5 fulfill all conditions and limits fil = ['mode = "testmode"', 'rfac <= 0.6'] lim = 3 result = self.db.query_model_array(filter=fil, limit=lim) template = ['parA', 'parB', 'parC', '_model', '_rfac', '_gen', '_particle'] dt = population.Population.get_pop_dtype(template) expected = np.zeros((lim,), dtype=dt) expected['parA'] = np.array([3.412, 4.123, 2.341]) expected['parB'] = np.array([7.856, 8.567, None]) expected['parC'] = np.array([None, None, 6.785]) expected['_model'] = np.array([93, 94, 95]) expected['_rfac'] = np.array([0.345, 0.354, 0.453]) expected['_gen'] = np.array([0, 0, 0]) expected['_particle'] = np.array([0, 0, 0]) self.assertEqual(result.shape, expected.shape) np.testing.assert_array_almost_equal(result['parA'], expected['parA']) np.testing.assert_array_almost_equal(result['parB'], expected['parB']) np.testing.assert_array_almost_equal(result['parC'], expected['parC']) np.testing.assert_array_almost_equal(result['_model'], expected['_model']) np.testing.assert_array_almost_equal(result['_gen'], expected['_gen']) np.testing.assert_array_almost_equal(result['_particle'], expected['_particle']) def test_query_best_results(self): self.setup_sample_database() model2 = {'parA': 4.123, 'parB': 8.567, '_model': 92, '_rfac': 0.654, '_gen': 1, '_particle': 2} model3 = {'parA': 3.412, 'parB': 7.856, '_model': 93, '_rfac': 0.345, '_gen': 1, '_particle': 3} model4 = {'parA': 4.123, 'parB': 8.567, '_model': 94, '_rfac': 0.354, '_gen': 1, '_particle': 4} model5 = {'parA': 2.341, 'parC': 6.785, '_model': 95, '_rfac': 0.453, '_gen': 1, '_particle': 5} model6 = {'parA': 4.123, 'parB': 8.567, '_model': 96, '_rfac': 0.354, '_gen': 1, '_particle': 6} model7 = {'parA': 5.123, 'parB': 6.567, '_model': 97, '_rfac': 0.154, '_gen': 1, '_particle': 7} self.db.register_params(model5) self.db.create_models_view() model2.update({'_scan': -1, '_domain': 11, '_emit': 21, '_region': 31}) model3.update({'_scan': 1, '_domain': 12, '_emit': 22, '_region': 32}) model4.update({'_scan': 2, '_domain': 11, '_emit': 23, '_region': 33}) model5.update({'_scan': 3, '_domain': 11, '_emit': 24, '_region': 34}) model6.update({'_scan': 4, '_domain': 11, '_emit': 25, '_region': 35}) model7.update({'_scan': 5, '_domain': -1, '_emit': -1, '_region': -1}) self.db.insert_result(model2, model2) self.db.insert_result(model3, model3) self.db.insert_result(model4, model4) self.db.insert_result(model5, model5) self.db.insert_result(model6, model6) self.db.insert_result(model7, model7) # only model3, model4 and model5 fulfill all conditions and limits fil = ['mode = "testmode"', 'domain = 11'] lim = 3 result = self.db.query_best_results(filter=fil, limit=lim) ifields = ['_db_job', '_db_model', '_db_result', '_model', '_scan', '_domain', '_emit', '_region', '_gen', '_particle'] ffields = ['_rfac'] dt = [(f, 'i8') for f in ifields] dt.extend([(f, 'f8') for f in ffields]) expected = np.zeros((lim,), dtype=dt) expected['_rfac'] = np.array([0.354, 0.354, 0.453]) expected['_model'] = np.array([94, 96, 95]) expected['_scan'] = np.array([2, 4, 3]) expected['_domain'] = np.array([11, 11, 11]) expected['_emit'] = np.array([23, 25, 24]) expected['_region'] = np.array([33, 35, 34]) expected['_gen'] = np.array([1, 1, 1]) expected['_particle'] = np.array([4, 6, 5]) self.assertEqual(result.shape, expected.shape) np.testing.assert_array_almost_equal(result['_rfac'], expected['_rfac']) np.testing.assert_array_equal(result['_model'], expected['_model']) np.testing.assert_array_equal(result['_scan'], expected['_scan']) np.testing.assert_array_equal(result['_domain'], expected['_domain']) np.testing.assert_array_equal(result['_emit'], expected['_emit']) np.testing.assert_array_equal(result['_region'], expected['_region']) np.testing.assert_array_equal(result['_gen'], expected['_gen']) np.testing.assert_array_equal(result['_particle'], expected['_particle']) def test_insert_result(self): self.setup_sample_database() index = dispatch.CalcID(15, 16, 17, 18, -1) result = {'parA': 4.123, 'parB': 8.567, '_rfac': 0.654, '_particle': 21} result_id = self.db.insert_result(index, result) c = self.db._conn.cursor() c.execute("select count(*) from Results") count = c.fetchone() self.assertEqual(count[0], 1) c.execute("select * from Results") row = c.fetchone() self.assertIsInstance(row['id'], int) self.assertEqual(row['id'], result_id) model_id = row['model_id'] self.assertIsInstance(model_id, int) self.assertEqual(row['scan'], index.scan) self.assertEqual(row['domain'], index.domain) self.assertEqual(row['emit'], index.emit) self.assertEqual(row['region'], index.region) self.assertEqual(row['rfac'], result['_rfac']) c.execute("select * from Models where id = :model_id", {'model_id': model_id}) row = c.fetchone() model_id = row['id'] self.assertIsInstance(model_id, int) self.assertEqual(row['job_id'], self.db.job_id) self.assertEqual(row['model'], index.model) self.assertIsNone(row['gen']) self.assertEqual(row['particle'], result['_particle']) sql = "select key, value from ParamValues " + \ "join Params on ParamValues.param_id = Params.id " + \ "where model_id = :model_id" c.execute(sql, {'model_id': model_id}) rows = c.fetchall() # list of Row objects self.assertEqual(len(rows), 2) for row in rows: self.assertAlmostEqual(row['value'], result[row['key']]) def test_update_result(self): self.setup_sample_database() index = dispatch.CalcID(15, 16, 17, 18, -1) result1 = {'parA': 4.123, 'parB': 8.567, '_rfac': 0.654, '_particle': 21} result_id1 = self.db.insert_result(index, result1) result2 = {'parA': 5.456, '_rfac': 0.254, '_particle': 11} result_id2 = self.db.insert_result(index, result2) result3 = result1.copy() result3.update(result2) self.assertEqual(result_id1, result_id2) c = self.db._conn.cursor() c.execute("select count(*) from Results") count = c.fetchone() self.assertEqual(count[0], 1) c.execute("select * from Results") row = c.fetchone() self.assertIsInstance(row['id'], int) self.assertEqual(row['id'], result_id1) model_id = row['model_id'] self.assertIsInstance(model_id, int) self.assertEqual(row['scan'], index.scan) self.assertEqual(row['domain'], index.domain) self.assertEqual(row['emit'], index.emit) self.assertEqual(row['region'], index.region) self.assertEqual(row['rfac'], result2['_rfac']) c.execute("select * from Models where id = :model_id", {'model_id': model_id}) row = c.fetchone() model_id = row['id'] self.assertIsInstance(model_id, int) self.assertEqual(row['job_id'], self.db.job_id) self.assertEqual(row['model'], index.model) self.assertIsNone(row['gen']) self.assertEqual(row['particle'], result2['_particle']) sql = "select key, value from ParamValues " + \ "join Params on ParamValues.param_id = Params.id " + \ "where model_id = :model_id" c.execute(sql, {'model_id': model_id}) rows = c.fetchall() # list of Row objects self.assertEqual(len(rows), 2) for row in rows: self.assertAlmostEqual(row['value'], result3[row['key']]) def test_update_result_dict(self): """ test update result with index as dictionary @return: """ self.setup_sample_database() index = {'_model': 15, '_scan': 16, '_domain': 17, '_emit': 18, '_region': -1} result1 = {'parA': 4.123, 'parB': 8.567, '_rfac': 0.654, '_particle': 21} result_id1 = self.db.insert_result(index, result1) result2 = {'parA': 5.456, '_rfac': 0.254, '_particle': 11} result_id2 = self.db.insert_result(index, result2) result3 = result1.copy() result3.update(result2) self.assertEqual(result_id1, result_id2) c = self.db._conn.cursor() c.execute("select count(*) from Results") count = c.fetchone() self.assertEqual(count[0], 1) c.execute("select * from Results") row = c.fetchone() self.assertIsInstance(row['id'], int) self.assertEqual(row['id'], result_id1) model_id = row['model_id'] self.assertIsInstance(model_id, int) self.assertEqual(row['scan'], index['_scan']) self.assertEqual(row['domain'], index['_domain']) self.assertEqual(row['emit'], index['_emit']) self.assertEqual(row['region'], index['_region']) self.assertEqual(row['rfac'], result2['_rfac']) c.execute("select * from Models where id = :model_id", {'model_id': model_id}) row = c.fetchone() model_id = row['id'] self.assertIsInstance(model_id, int) self.assertEqual(row['job_id'], self.db.job_id) self.assertEqual(row['model'], index['_model']) self.assertIsNone(row['gen']) self.assertEqual(row['particle'], result2['_particle']) sql = "select key, value from ParamValues " + \ "join Params on ParamValues.param_id = Params.id " + \ "where model_id = :model_id" c.execute(sql, {'model_id': model_id}) rows = c.fetchall() # list of Row objects self.assertEqual(len(rows), 2) for row in rows: self.assertAlmostEqual(row['value'], result3[row['key']]) def test_query_best_task_models(self): self.setup_sample_database() model0xxx = {'_model': 0, '_scan': -1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.01} model00xx = {'_model': 1, '_scan': 0, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.02} model000x = {'_model': 2, '_scan': 0, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.03} model01xx = {'_model': 3, '_scan': 1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.04} model010x = {'_model': 4, '_scan': 1, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.05} model1xxx = {'_model': 5, '_scan': -1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.09} model10xx = {'_model': 6, '_scan': 0, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.08} model100x = {'_model': 7, '_scan': 0, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.07} model11xx = {'_model': 8, '_scan': 1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.06} model110x = {'_model': 9, '_scan': 1, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.05} model2xxx = {'_model': 10, '_scan': -1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.01} self.db.insert_result(model0xxx, model0xxx) self.db.insert_result(model00xx, model00xx) self.db.insert_result(model000x, model000x) self.db.insert_result(model01xx, model01xx) self.db.insert_result(model010x, model010x) self.db.insert_result(model1xxx, model1xxx) self.db.insert_result(model10xx, model10xx) self.db.insert_result(model100x, model100x) self.db.insert_result(model11xx, model11xx) self.db.insert_result(model110x, model110x) self.db.insert_result(model2xxx, model2xxx) result = self.db.query_best_task_models(level=1, count=2) expected = {0, 1, 3, 6, 8, 10} self.assertEqual(result, expected) def test_sample_project(self): """ test ingestion of two results this test uses the same call sequence as the actual pmsco code. it has been used to debug a problem in the main code where prevous results were overwritten. """ db_filename = os.path.join(self.test_dir, "sample_database.db") lock_filename = os.path.join(self.test_dir, "sample_database.lock") # project project_name = self.__class__.__name__ project_module = self.__class__.__module__ # job 1 job_name1 = "job1" result1 = {'parA': 1.234, 'parB': 5.678, '_model': 91, '_rfac': 0.534} task1 = dispatch.CalcID(91, -1, -1, -1, -1) # ingest job 1 _db = db.ResultsDatabase() _db.connect(db_filename, lock_filename=lock_filename) project_id1 = _db.register_project(project_name, project_module) job_id1 = _db.register_job(project_id1, job_name1, "test", "localhost", "", datetime.datetime.now(), "") # _db.insert_jobtags(job_id, self.job_tags) _db.register_params(result1.keys()) _db.create_models_view() result_id1 = _db.insert_result(task1, result1) _db.disconnect() # job 2 job_name2 = "job2" result2 = {'parA': 1.345, 'parB': 5.789, '_model': 91, '_rfac': 0.654} task2 = dispatch.CalcID(91, -1, -1, -1, -1) # ingest job 2 _db = db.ResultsDatabase() _db.connect(db_filename, lock_filename=lock_filename) project_id2 = _db.register_project(project_name, project_module) job_id2 = _db.register_job(project_id2, job_name2, "test", "localhost", "", datetime.datetime.now(), "") # _db.insert_jobtags(job_id, self.job_tags) _db.register_params(result2.keys()) _db.create_models_view() result_id2 = _db.insert_result(task2, result2) _db.disconnect() # check jobs _db = db.ResultsDatabase() _db.connect(db_filename, lock_filename=lock_filename) sql = "select * from Jobs " c = _db._conn.execute(sql) rows = c.fetchall() self.assertEqual(len(rows), 2) # check models sql = "select * from Models " c = _db._conn.execute(sql) rows = c.fetchall() self.assertEqual(len(rows), 2) # check results sql = "select * from Results " c = _db._conn.execute(sql) rows = c.fetchall() self.assertEqual(len(rows), 2) _db.disconnect() if __name__ == '__main__': unittest.main()