diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..ab13e31 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,13 @@ +pages: + stage: deploy + script: + - make docs + - mv docs/html/ public/ + artifacts: + paths: + - public + only: + - master + tags: + - doxygen + \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..6003127 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,34 @@ +Release 2.2.0 (2020-09-04) +========================== + +| Hash | Date | Description | +| ---- | ---- | ----------- | +| 4bb2331 | 2020-07-30 | demo project for arbitrary molecule (cluster file) | +| f984f64 | 2020-09-03 | bugfix: DATA CORRUPTION in phagen translator (emitter mix-up) | +| 11fb849 | 2020-09-02 | bugfix: load native cluster file: wrong column order | +| d071c97 | 2020-09-01 | bugfix: initial-state command line option not respected | +| 9705eed | 2020-07-28 | photoionization cross sections and spectrum simulator | +| 98312f0 | 2020-06-12 | database: use local lock objects | +| c8fb974 | 2020-04-30 | database: create view on results and models | +| 2cfebcb | 2020-05-14 | REFACTORING: Domain -> ModelSpace, Params -> CalculatorParams | +| d5516ae | 2020-05-14 | REFACTORING: symmetry -> domain | +| b2dd21b | 2020-05-13 | possible conda/mpi4py conflict - changed installation procedure | +| cf5c7fd | 2020-05-12 | cluster: new calc_scattering_angles function | +| 20df82d | 2020-05-07 | include a periodic table of binding energies of the elements | +| 5d560bf | 2020-04-24 | clean up files in the main loop and in the end | +| 6e0ade5 | 2020-04-24 | bugfix: database ingestion overwrites results from previous jobs | +| 263b220 | 2020-04-24 | time out at least 10 minutes before the hard time limit given on the command line | +| 4ec526d | 2020-04-09 | cluster: new get_center function | +| fcdef4f | 2020-04-09 | bugfix: type error in grid optimizer | +| a4d1cf7 | 2020-03-05 | bugfix: file extension in phagen/makefile | +| 9461e46 | 2019-09-11 | dispatch: new algo to distribute processing slots to task levels | +| 30851ea | 2020-03-04 | bugfix: load single-line data files correctly! | +| 71fe0c6 | 2019-10-04 | cluster generator for zincblende crystal | +| 23965e3 | 2020-02-26 | phagen translator: fix phase convention (MAJOR), fix single-energy | +| cf1814f | 2019-09-11 | dispatch: give more priority to mid-level tasks in single mode | +| 58c778d | 2019-09-05 | improve performance of cluster add_bulk, add_layer and rotate | +| 20ef1af | 2019-09-05 | unit test for Cluster.translate, bugfix in translate and relax | +| 0b80850 | 2019-07-17 | fix compatibility with numpy >= 1.14, require numpy >= 1.13 | +| 1d0a542 | 2019-07-16 | database: introduce job-tags | +| 8461d81 | 2019-07-05 | qpmsco: delete code after execution | + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5cb1657 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2015-2020 Paul Scherrer Institut + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 9cc1c51..c82e50c 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Highlights - angle or energy scanned XPD. - various scanning modes including energy, polar angle, azimuthal angle, analyser angle. -- averaging over multiple symmetries (domains or emitters). +- averaging over multiple domains and emitters. - global optimization of multiple scans. - structural optimization algorithms: particle swarm optimization, grid search, gradient search. - calculation of the modulation function. @@ -38,6 +38,12 @@ Detailed installation instructions and dependencies can be found in the document (docs/src/installation.dox). A [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) compiler with Doxypy is required to generate the documentation in HTML or LaTeX format. +The easiest way to set up an environment with all dependencies and without side-effects on other installed software is to use a [Singularity](https://www.sylabs.io/guides/2.5/user-guide/index.html) container. +A Singularity recipe file is part of the distribution, see the PMSCO documentation for details. +On newer Linux systems (e.g. Ubuntu 18.04), Singularity is available from the package manager. +Installation in a [virtual box](https://www.virtualbox.org/) on Windows or Mac is straightforward using the [Vagrant](https://www.vagrantup.com/) system. +A Vagrant file is included in the distribution. + The public distribution of PMSCO does not contain the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code. Please obtain the EDAC source code from the original author, copy it to the pmsco/edac directory, and apply the edac_all.patch patch. @@ -61,7 +67,7 @@ Matthias Muntwiler, Copyright --------- -Copyright 2015-2018 by [Paul Scherrer Institut](http://www.psi.ch) +Copyright 2015-2020 by [Paul Scherrer Institut](http://www.psi.ch) Release Notes diff --git a/docs/src/commandline.dox b/docs/src/commandline.dox index f29182f..9a953e5 100644 --- a/docs/src/commandline.dox +++ b/docs/src/commandline.dox @@ -60,7 +60,7 @@ Multiple names can be specified and must be separated by spaces. | debug | debug files | delete | | model | output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) | keep | | scan | output files in ETPAI format: scan (a_b_-1_-1_-1) | keep | -| symmetry | output files in ETPAI format: symmetry (a_b_c_-1_-1) | delete | +| domain | output files in ETPAI format: domain (a_b_c_-1_-1) | delete | | emitter | output files in ETPAI format: emitter (a_b_c_d_-1) | delete | | region | output files in ETPAI format: region (a_b_c_d_e) | delete | | report| final report of results | keep always | diff --git a/docs/src/concepts-emitter.dox b/docs/src/concepts-emitter.dox index 399f457..2c798e1 100644 --- a/docs/src/concepts-emitter.dox +++ b/docs/src/concepts-emitter.dox @@ -105,12 +105,12 @@ is assigned to the project's cluster_generator attribute. 1. Implement a count_emitters method in your project class if the project uses more than one emitter configurations. It must have same method contract as pmsco.cluster.ClusterGenerator.count_emitters. - Specifically, it must return the number of emitter configurations of a given model, scan and symmetry. + Specifically, it must return the number of emitter configurations of a given model, scan and domain. If there is only one configuration, the method does not need to be implemented. 2. Implement a create_cluster method in your project class. It must have same method contract as pmsco.cluster.ClusterGenerator.create_cluster. - Specifically, it must return a cluster.Cluster object for the given model, scan, symmetry and emitter configuration. + Specifically, it must return a cluster.Cluster object for the given model, scan, domain and emitter configuration. The emitter atoms must be marked according to the emitter configuration specified by the index argument. Note that, depending on the index.emit argument, all emitter atoms must be marked or only the ones of the corresponding emitter configuration. diff --git a/docs/src/concepts-symmetry.dox b/docs/src/concepts-symmetry.dox index 4b5c710..316bc95 100644 --- a/docs/src/concepts-symmetry.dox +++ b/docs/src/concepts-symmetry.dox @@ -1,32 +1,32 @@ -/*! @page pag_concepts_symmetry Symmetry +/*! @page pag_concepts_domain Domain -\section sec_symmetry Symmetry and Domain Averaging +\section sec_domain Domain Averaging -A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster) +A _domain_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster) that is derived from the same set of model parameters and that contributes incoherently to the measured diffraction pattern. -A symmetry may be represented by a special symmetry parameter which is not subject to optimization. +A domain may be represented by special domain parameters that are not subject to optimization. -For instance, a real sample may have additional rotational domains that are not present in the cluster, -increasing the symmetry from three-fold to six-fold. +For instance, a real sample may have rotational domains that are not present in the cluster, +changing the symmetry from three-fold to six-fold. Or, an adsorbate may be present in a number of different lateral configurations on the substrate. In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement. In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations. -PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks). -The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level. +PMSCO provides the basic framework to spawn multiple calculations according to the number of domains (cf. \ref sec_tasks). +The actual data reduction from multiple domain to one measurement needs to be implemented on the project level. This section explains the necessary steps. -1. Your project needs to populate the pmsco.project.Project.symmetries list. - For each symmetry, add a dictionary of symmetry parameters, e.g. {'angle_azi': 15.0}. - There must be at least one symmetry in a project, otherwise no calculation is executed. +1. Your project needs to populate the pmsco.project.Project.domains list. + For each domain, add a dictionary of domain parameters, e.g. {'angle_azi': 15.0}. + At least one domain must be declared in a project, otherwise no calculation is executed. -2. The project may apply the symmetry of a task to the cluster and parameter file if necessary. - The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters. - -3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement. - The default method implemented in pmsco.project.Project just adds up all calculations with equal weight. - If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm. +2. The project may use the domain index of a task to build the cluster and parameter file as necessary. + The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular domain in addition to the model parameters. +3. The project combines the results of the calculations for the various domains into one dataset that can be compared to the measurement. + The default method implemented in pmsco.project.Project just adds up all calculations with customizable weight. + It uses the special model parameters `wdom1`, `wdom2`, ... (if defined, default 1) to weight each domain. + If you need more control, override the pmsco.project.Project.combine_domains method and implement your own algorithm. */ diff --git a/docs/src/concepts-tasks.dox b/docs/src/concepts-tasks.dox index af9cbb7..7e08bf3 100644 --- a/docs/src/concepts-tasks.dox +++ b/docs/src/concepts-tasks.dox @@ -12,7 +12,7 @@ mandated by the project but also efficient calculations in a multi-process envir A concrete set of parameters is called @ref sec_task_model. 2. The sample was measured multiple times or under different conditions (initial states, photon energy, emission angle). Each contiguous measured dataset is called a @ref sec_task_scan. -3. The measurement averages over multiple inequivalent domains, cf. @ref sec_task_symmetry. +3. The measurement averages over multiple inequivalent domains, cf. @ref sec_task_domain. 4. The measurement includes multiple geometrically inequivalent emitters, cf. @ref sec_task_emitter. 5. The calculation should be distributed over multiple processes that run in parallel to reduce the wall time, cf. @ref sec_task_region. @@ -24,7 +24,7 @@ as shown schematically in the following diagram. class CalculationTask { model scan -symmetry +domain emitter region .. @@ -55,7 +55,7 @@ class Scan { alphas } -class Symmetry { +class Domain { index .. rotation @@ -75,13 +75,13 @@ class Region { CalculationTask *-- Model CalculationTask *-- Scan -CalculationTask *-- Symmetry +CalculationTask *-- Domain CalculationTask *-- Emitter CalculationTask *-- Region class Project { scans - symmetries + domains model_handler cluster_generator } @@ -98,7 +98,7 @@ class ModelHandler { Model ..> ModelHandler Scan ..> Project -Symmetry ..> Project +Domain ..> Project Emitter ..> ClusterGenerator Region ..> Project @@ -141,29 +141,29 @@ PMSCO runs a separate calculation for each scan file and compares the combined r This is sometimes called a _global fit_. -\subsection sec_task_symmetry Symmetry +\subsection sec_task_domain Domain -A _symmetry_ is a discrete variant of a set of calculation parameters (including the atomic cluster) +A _domain_ is a discrete variant of a set of calculation parameters (including the atomic cluster) that is independent of the _model_ and contributes incoherently to the measured diffraction pattern. For instance, for a system that includes two inequivalent structural domains, two separate clusters have to be generated and calculated for each model. -The symmetry parameter is not subject to optimization. +The domain parameter is not subject to optimization. However, if the branching ratio is unknown a priori, a model parameter can be introduced -to control the relative contribution of a particular symmetry to the diffraction pattern. -In that case, the @ref pmsco.project.Project.combine_symmetries method must be overridden. +to control the relative contribution of a particular domain to the diffraction pattern. +The basic @ref pmsco.project.Project.combine_domains method reads the special model parameters `wdom1`, `wdom2`, etc. to weight the individual domains. -A symmetry is identified by its index which is an index into the project's symmetries table (pmsco.project.Project.symmetries). -It is up to the user project to give a physical description of the symmetry, e.g. a rotation angle, -by assigning a meaningful value (e.g. a dictionary with key-value pairs) to the symmetries table. +A domain is identified by its index which is an index into the project's domains table (pmsco.project.Project.domains). +It is up to the user project to give a physical description of the domain, e.g. a rotation angle, +by assigning a meaningful value (e.g. a dictionary with key-value pairs) to the domains table. The cluster generator can then read the value from the table rather than from constants in the code. -The figure shows two examples of symmetry parameters. -The corresponding symmetry table could be set up like this: +The figure shows two examples of domain parameters. +The corresponding domains table could be set up like this: @code{.py} -project.add_symmetry = {'rotation': 0.0, 'registry': 0.0} -project.add_symmetry = {'rotation': 30.0, 'registry': 0.0} +project.add_domain({'rotation': 0.0, 'registry': 0.0}) +project.add_domain({'rotation': 30.0, 'registry': 0.0}) @endcode @@ -173,9 +173,9 @@ The _emitter_ component of the calculation task selects a specific emitter confi This is merely an index whose interpretation is up to the cluster generator. The default emitter handler enumerates the emitter index from 1 to the emitter count reported by the cluster generator. -The emitter count and list of emitters may depend on model, scan and symmetry. +The emitter count and list of emitters may depend on model, scan and domain. -The cluster generator can tailor a cluster to the given model, scan, symmetry and emitter index. +The cluster generator can tailor a cluster to the given model, scan, domain and emitter index. For example, in a large unit cell with many inequivalent emitters, the generator might return a small sub-cluster around the actual emitter for better calculation performance since the distant atoms of the unit cell do not contribute to the diffraction pattern. @@ -237,20 +237,20 @@ scan object ScanHandler -object "Sym: CalculationTask" as Sym { +object "Domain: CalculationTask" as Domain { index = (i,j,k,-1,-1) model scan -symmetry +domain } -object "SymmetryHandler" as SymHandler +object "DomainHandler" as DomainHandler object "Emitter: CalculationTask" as Emitter { index = (i,j,k,l,-1) model scan -symmetry +domain emitter } @@ -260,7 +260,7 @@ object "Region: CalculationTask" as Region { index = (i,j,k,l,m) model scan -symmetry +domain emitter region } @@ -270,14 +270,14 @@ object RegionHandler Root "1" o.. "1..*" Model Model "1" o.. "1..*" Scan -Scan "1" o.. "1..*" Sym -Sym "1" o.. "1..*" Emitter +Scan "1" o.. "1..*" Domain +Domain "1" o.. "1..*" Emitter Emitter "1" o.. "1..*" Region (Root, Model) .. ModelHandler (Model, Scan) .. ScanHandler -(Scan, Sym) .. SymHandler -(Sym, Emitter) .. EmitterHandler +(Scan, Domain) .. DomainHandler +(Domain, Emitter) .. EmitterHandler (Emitter, Region) .. RegionHandler @enduml @@ -293,7 +293,7 @@ and the tasks are passed back through the task handler stack. In this phase, each level joins the datasets from the sub-tasks to the data requested by the parent task. For example, at the lowest level, one result file is present for each region. The region handler gathers all files that correspond to the same parent task -(i.e. have the same emitter, symmetry, scan and model attributes), +(i.e. have the same emitter, domain, scan and model attributes), joins them to one file which includes all regions, links the file to the parent task and passes the result to the next higher level. diff --git a/docs/src/dataflow.dot b/docs/src/dataflow.dot index 4a53ae5..34927c8 100644 --- a/docs/src/dataflow.dot +++ b/docs/src/dataflow.dot @@ -10,7 +10,7 @@ digraph G { create_params; calc_modf; calc_rfac; - comb_syms; + comb_doms; comb_scans; } */ @@ -24,11 +24,11 @@ digraph G { model_handler -> model_creator [constraint=false, label="optimize"]; } - subgraph cluster_symmetry { - label = "symmetry handler"; + subgraph cluster_domain { + label = "domain handler"; rank = same; - sym_creator [label="expand models", group=creators]; - sym_handler [label="combine symmetries", group=handlers]; + dom_creator [label="expand models", group=creators]; + dom_handler [label="combine domains", group=handlers]; } subgraph cluster_scan { @@ -47,15 +47,15 @@ digraph G { calculator [label="calculator (EDAC)", shape=box]; - model_creator -> sym_creator [label="model", style=bold]; - sym_creator -> scan_creator [label="models", style=bold]; + model_creator -> dom_creator [label="model", style=bold]; + dom_creator -> scan_creator [label="models", style=bold]; scan_creator -> calc_creator [label="models", style=bold]; calc_creator -> calculator [label="clusters,\rparameters", style=bold]; calculator -> calc_handler [label="output files", style=bold]; calc_handler -> scan_handler [label="raw data files", style=bold]; - scan_handler -> sym_handler [label="combined scans", style=bold]; - sym_handler -> model_handler [label="combined symmetries", style=bold]; + scan_handler -> dom_handler [label="combined scans", style=bold]; + dom_handler -> model_handler [label="combined domains", style=bold]; mode [shape=parallelogram]; mode -> model_creator [lhead="cluster_model"]; @@ -76,8 +76,8 @@ digraph G { calc_rfac [shape=cds, label="R-factor function"]; calc_rfac -> model_handler [style=dashed]; - comb_syms [shape=cds, label="symmetry combination rule"]; - comb_syms -> sym_handler [style=dashed]; + comb_doms [shape=cds, label="domain combination rule"]; + comb_doms -> dom_handler [style=dashed]; comb_scans [shape=cds, label="scan combination rule"]; comb_scans -> scan_handler [style=dashed]; diff --git a/docs/src/installation.dox b/docs/src/installation.dox index dfb99e4..bf9abd1 100644 --- a/docs/src/installation.dox +++ b/docs/src/installation.dox @@ -9,7 +9,7 @@ the public repository at https://gitlab.psi.ch/pearl/pmsco. For their own developments, users should clone the repository. Changes to common code should be submitted via pull requests. -The program code of PMSCO and its external programs is written in Python, C++ and Fortran. +The program code of PMSCO and its external programs is written in Python 3.6, C++ and Fortran. The code will run in any recent Linux environment on a workstation or in a virtual machine. Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/) and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested. @@ -18,6 +18,7 @@ or cluster with 20-50 available processor cores is recommended. The program requires about 2 GB of RAM per process. The recommended IDE is [PyCharm (community edition)](https://www.jetbrains.com/pycharm). +[Spyder](https://docs.spyder-ide.org/index.html) is a good alternative with a better focus on scientific data. The documentation in [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) format is part of the source code. The Doxygen compiler can generate separate documentation in HTML or LaTeX. @@ -38,7 +39,7 @@ The code depends on the following libraries: - SWIG - BLAS - LAPACK -- Python 2.7 or 3.6 +- Python 3.6 - Numpy >= 1.13 - Python packages listed in the requirements.txt file @@ -50,12 +51,6 @@ and it's difficult to switch between different Python versions. On the PSI cluster machines, the environment must be set using the module system and conda (on Ra). Details are explained in the PEARL Wiki. -PMSCO runs under Python 2.7 or Python 3.6. -Since Python 2 is being deprecated, Python 3.6 is recommended. -Compatibility with Python 2.7 is currently maintained by using -the [future package](http://python-future.org/compatible_idioms.html) -but may be dropped at any time. - \subsection sec_install_instructions Instructions @@ -108,7 +103,6 @@ conda install -q --yes -n pmsco \ "numpy>=1.13" \ scipy \ ipython \ - mpi4py \ matplotlib \ nose \ mock \ @@ -116,9 +110,13 @@ conda install -q --yes -n pmsco \ statsmodels \ swig \ gitpython -pip install periodictable attrdict fasteners +pip install periodictable attrdict fasteners mpi4py @endcode +@note `mpi4pi` should be installed via pip, _not_ conda. + conda might install its own MPI libraries, which can cause a conflict with system libraries. + (cf. [mpi4py forum](https://groups.google.com/forum/#!topic/mpi4py/xpPKcOO-H4k)) + \subsubsection sec_install_singularity Installation in Singularity container A [Singularity](https://www.sylabs.io/guides/2.5/user-guide/index.html) container @@ -149,7 +147,7 @@ or build one from a script provided by the PMSCO repository: @code{.sh} cd ~/containers -sudo singularity build pmsco.simg ~/containers/pmsco/extras/singularity/singularity_python2 +sudo singularity build pmsco.simg ~/containers/pmsco/extras/singularity/singularity_python3 @endcode To work with PMSCO, start an interactive shell in the container and switch to the pmsco environment. diff --git a/docs/src/introduction.dox b/docs/src/introduction.dox index b6167e3..cb342be 100644 --- a/docs/src/introduction.dox +++ b/docs/src/introduction.dox @@ -24,7 +24,7 @@ Other programs may be integrated as well. - angle or energy scanned XPD. - various scanning modes including energy, polar angle, azimuthal angle, analyser angle. -- averaging over multiple symmetries (domains or emitters). +- averaging over multiple domains and emitters. - global optimization of multiple scans. - structural optimization algorithms: particle swarm optimization, grid search, gradient search. - calculation of the modulation function. @@ -39,8 +39,8 @@ To set up a new optimization project, you need to: - create a new directory under projects. - create a new Python module in this directory, e.g., my_project.py. - implement a sub-class of project.Project in my_project.py. -- override the create_cluster, create_params, and create_domain methods. -- optionally, override the combine_symmetries and combine_scans methods. +- override the create_cluster, create_params, and create_model_space methods. +- optionally, override the combine_domains and combine_scans methods. - add a global function create_project to my_project.py. - provide experimental data files (intensity or modulation function). diff --git a/docs/src/tasks.dot b/docs/src/tasks.dot index 7239ac6..ebb0b52 100644 --- a/docs/src/tasks.dot +++ b/docs/src/tasks.dot @@ -38,15 +38,15 @@ custom_scan [label="scan\nconfiguration", shape=note]; {rank=same; custom_scan; create_scan; combine_scan;} custom_scan -> create_scan [lhead=cluster_scan]; -subgraph cluster_symmetry { -label="symmetry handler"; +subgraph cluster_domain { +label="domain handler"; rank=same; -create_symmetry [label="define\nsymmetry\ntasks"]; -combine_symmetry [label="gather\nsymmetry\nresults"]; +create_model_space [label="define\ndomain\ntasks"]; +combine_domain [label="gather\ndomain\nresults"]; } -custom_symmetry [label="symmetry\ndefinition", shape=cds]; -{rank=same; create_symmetry; combine_symmetry; custom_symmetry;} -custom_symmetry -> combine_symmetry [lhead=cluster_symmetry]; +custom_domain [label="domain\ndefinition", shape=cds]; +{rank=same; create_model_space; combine_domain; custom_domain;} +custom_domain -> combine_domain [lhead=cluster_domain]; subgraph cluster_emitter { label="emitter handler"; @@ -80,11 +80,11 @@ create_cluster -> edac; create_model -> create_scan [label="level 1 tasks"]; evaluate_model -> combine_scan [label="level 1 results", dir=back]; -create_scan -> create_symmetry [label="level 2 tasks"]; -combine_scan -> combine_symmetry [label="level 2 results", dir=back]; +create_scan -> create_model_space [label="level 2 tasks"]; +combine_scan -> combine_domain [label="level 2 results", dir=back]; -create_symmetry -> create_emitter [label="level 3 tasks"]; -combine_symmetry -> combine_emitter [label="level 3 results", dir=back]; +create_model_space -> create_emitter [label="level 3 tasks"]; +combine_domain -> combine_emitter [label="level 3 results", dir=back]; create_emitter -> create_region [label="level 4 tasks"]; combine_emitter -> combine_region [label="level 4 results", dir=back]; diff --git a/docs/src/uml/CalculationTask-class.puml b/docs/src/uml/CalculationTask-class.puml index 098b161..8a7c389 100644 --- a/docs/src/uml/CalculationTask-class.puml +++ b/docs/src/uml/CalculationTask-class.puml @@ -28,7 +28,7 @@ remove_task_file() class CalcID { model scan -sym +domain emit region } diff --git a/docs/src/uml/CalculationTask-objects.puml b/docs/src/uml/CalculationTask-objects.puml index 8d2268f..794acd9 100644 --- a/docs/src/uml/CalculationTask-objects.puml +++ b/docs/src/uml/CalculationTask-objects.puml @@ -43,15 +43,15 @@ parent = 2, -1, -1, -1, -1 model = {'d': 7} } -Scan11 o.. Sym111 +Scan11 o.. Dom111 -object Sym111 { +object Dom111 { id = 1, 1, 1, -1, -1 parent = 1, 1, -1, -1, -1 model = {'d': 5} } -Sym111 o.. Emitter1111 +Dom111 o.. Emitter1111 object Emitter1111 { id = 1, 1, 1, 1, -1 @@ -90,18 +90,18 @@ scan object ScanHandler -object "Sym: CalculationTask" as Sym { +object "Domain: CalculationTask" as Domain { model scan -symmetry +domain } -object "SymmetryHandler" as SymHandler +object "DomainHandler" as DomainHandler object "Emitter: CalculationTask" as Emitter { model scan -symmetry +domain emitter } @@ -110,7 +110,7 @@ object EmitterHandler object "Region: CalculationTask" as Region { model scan -symmetry +domain emitter region } @@ -120,14 +120,14 @@ object RegionHandler Root "1" o.. "1..*" Model Model "1" o.. "1..*" Scan -Scan "1" o.. "1..*" Sym -Sym "1" o.. "1..*" Emitter +Scan "1" o.. "1..*" Domain +Domain "1" o.. "1..*" Emitter Emitter "1" o.. "1..*" Region (Root, Model) .. ModelHandler (Model, Scan) .. ScanHandler -(Scan, Sym) .. SymHandler -(Sym, Emitter) .. EmitterHandler +(Scan, Domain) .. DomainHandler +(Domain, Emitter) .. EmitterHandler (Emitter, Region) .. RegionHandler @enduml diff --git a/docs/src/uml/calculation-task.puml b/docs/src/uml/calculation-task.puml index 3862cd6..1a55109 100644 --- a/docs/src/uml/calculation-task.puml +++ b/docs/src/uml/calculation-task.puml @@ -4,7 +4,7 @@ class CalculationTask { model scan -symmetry +domain emitter region .. @@ -35,7 +35,7 @@ class Scan { alphas } -class Symmetry { +class Domain { index .. rotation @@ -55,13 +55,13 @@ class Region { CalculationTask *-- Model CalculationTask *-- Scan -CalculationTask *-- Symmetry +CalculationTask *-- Domain CalculationTask *-- Emitter CalculationTask *-- Region class Project { scans - symmetries + domains model_handler cluster_generator } @@ -78,7 +78,7 @@ class ModelHandler { Model ..> ModelHandler Scan ..> Project -Symmetry ..> Project +Domain ..> Project Emitter ..> ClusterGenerator Region ..> Project diff --git a/docs/src/uml/database.puml b/docs/src/uml/database.puml index fed6e6f..5d1cad4 100644 --- a/docs/src/uml/database.puml +++ b/docs/src/uml/database.puml @@ -9,14 +9,6 @@ name code } -class Scan << (T,orchid) >> { -id -.. -job_id -.. -name -} - class Job << (T,orchid) >> { id .. @@ -30,6 +22,22 @@ datetime description } +class Tag << (T,orchid) >> { +id +.. +.. +key +} + +class JobTag << (T,orchid) >> { +id +.. +tag_id +job_id +.. +value +} + class Model << (T,orchid) >> { id .. @@ -46,7 +54,7 @@ id model_id .. scan -sym +domain emit region rfac @@ -69,8 +77,9 @@ value } Project "1" *-- "*" Job +Job "1" *-- "*" JobTag +Tag "1" *-- "*" JobTag Job "1" *-- "*" Model -Job "1" *-- "*" Scan Param "1" *-- "*" ParamValue Model "1" *-- "*" ParamValue Model "1" *-- "*" Result diff --git a/docs/src/uml/handler-activity.puml b/docs/src/uml/handler-activity.puml index 5267352..e3282b8 100644 --- a/docs/src/uml/handler-activity.puml +++ b/docs/src/uml/handler-activity.puml @@ -20,7 +20,7 @@ repeat partition "generate tasks" { :define model tasks; :define scan tasks; -:define symmetry tasks; +:define domain tasks; :define emitter tasks; :define region tasks; } @@ -34,7 +34,7 @@ end fork partition "collect results" { :gather region results; :gather emitter results; -:gather symmetry results; +:gather domain results; :gather scan results; :gather model results; } diff --git a/docs/src/uml/minimum-project-classes.puml b/docs/src/uml/minimum-project-classes.puml index 996ac83..823aff8 100644 --- a/docs/src/uml/minimum-project-classes.puml +++ b/docs/src/uml/minimum-project-classes.puml @@ -5,10 +5,10 @@ package pmsco { mode code scans - symmetries + domains {abstract} create_cluster() {abstract} create_params() - {abstract} create_domain() + {abstract} create_model_space() } } @@ -18,7 +18,7 @@ package projects { __init__() create_cluster() create_params() - create_domain() + create_model_space() } } diff --git a/docs/src/uml/project-classes.puml b/docs/src/uml/project-classes.puml index 127236c..56f6f67 100644 --- a/docs/src/uml/project-classes.puml +++ b/docs/src/uml/project-classes.puml @@ -4,13 +4,13 @@ abstract class Project { mode : str = "single" code : str = "edac" scans : Scan [1..*] - symmetries : dict [1..*] + domains : dict [1..*] cluster_generator : ClusterGenerator handler_classes files : FileTracker {abstract} create_cluster() : Cluster - {abstract} create_params() : Params - {abstract} create_domain() : Domain + {abstract} create_params() : CalculatorParams + {abstract} create_model_space() : ModelSpace } class Scan { @@ -28,7 +28,7 @@ class Scan { import_scan_file() } -class Domain { +class ModelSpace { start : dict min : dict max : dict @@ -37,7 +37,7 @@ class Domain { get_param(name) } -class Params { +class CalculatorParams { title comment cluster_file diff --git a/docs/src/uml/top-activity-partitions.puml b/docs/src/uml/top-activity-partitions.puml index 3edc485..ebecb96 100644 --- a/docs/src/uml/top-activity-partitions.puml +++ b/docs/src/uml/top-activity-partitions.puml @@ -25,7 +25,7 @@ stop |pmsco| start -:define task (model, scan, symmetry, emitter, region); +:define task (model, scan, domain, emitter, region); |project| :create cluster; :create parameters; diff --git a/docs/src/uml/user-project-classes.puml b/docs/src/uml/user-project-classes.puml index 802e3b4..f4a445a 100644 --- a/docs/src/uml/user-project-classes.puml +++ b/docs/src/uml/user-project-classes.puml @@ -5,16 +5,16 @@ package pmsco { mode code scans - symmetries + domains cluster_generator handler_classes __ {abstract} create_cluster() {abstract} create_params() - {abstract} create_domain() + {abstract} create_model_space() .. combine_scans() - combine_symmetries() + combine_domains() combine_emitters() calc_modulation() calc_rfactor() @@ -34,9 +34,9 @@ package projects { setup() .. create_params() - create_domain() + create_model_space() .. - combine_symmetries() + combine_domains() } class UserClusterGenerator { diff --git a/extras/singularity/singularity_python2 b/extras/singularity/singularity_python2 index bb16673..8ea51ab 100644 --- a/extras/singularity/singularity_python2 +++ b/extras/singularity/singularity_python2 @@ -78,7 +78,6 @@ try agent forwarding (-A option to ssh). "numpy>=1.13" \ scipy \ ipython \ - mpi4py \ matplotlib \ nose \ mock \ @@ -86,7 +85,7 @@ try agent forwarding (-A option to ssh). statsmodels \ swig conda clean --all -y - /usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners + /usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners mpi4py #%test diff --git a/extras/singularity/singularity_python3 b/extras/singularity/singularity_python3 index 85b0003..02d44db 100644 --- a/extras/singularity/singularity_python3 +++ b/extras/singularity/singularity_python3 @@ -77,7 +77,6 @@ try agent forwarding (-A option to ssh). "numpy>=1.13" \ scipy \ ipython \ - mpi4py \ matplotlib \ nose \ mock \ @@ -85,7 +84,7 @@ try agent forwarding (-A option to ssh). statsmodels \ swig conda clean --all -y - /usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners + /usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners mpi4py #%test diff --git a/makefile b/makefile index e25c368..a70e257 100644 --- a/makefile +++ b/makefile @@ -40,9 +40,9 @@ SHELL=/bin/sh PMSCO_DIR = pmsco DOCS_DIR = docs -all: edac loess docs +all: edac loess phagen docs -bin: edac loess +bin: edac loess phagen edac loess msc mufpot phagen: $(MAKE) -C $(PMSCO_DIR) diff --git a/pmsco/calculators/calculator.py b/pmsco/calculators/calculator.py index 0960ef6..247a9b0 100644 --- a/pmsco/calculators/calculator.py +++ b/pmsco/calculators/calculator.py @@ -42,7 +42,7 @@ class Calculator(object): or output_file + '.etpai' depending on scan mode. all other intermediate files are deleted unless keep_temp_files is True. - @param params: a pmsco.project.Params object with all necessary values except cluster and output files set. + @param params: a pmsco.project.CalculatorParams object with all necessary values except cluster and output files set. @param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set. diff --git a/pmsco/calculators/edac.py b/pmsco/calculators/edac.py index 4ee3dab..11cc525 100644 --- a/pmsco/calculators/edac.py +++ b/pmsco/calculators/edac.py @@ -49,7 +49,7 @@ class EdacCalculator(calculator.Calculator): if alpha is defined, theta is implicitly set to normal emission! (to be generalized) - @param params: a pmsco.project.Params object with all necessary values except cluster and output files set. + @param params: a pmsco.project.CalculatorParams object with all necessary values except cluster and output files set. @param scan: a pmsco.project.Scan() object describing the experimental scanning scheme. @@ -173,7 +173,7 @@ class EdacCalculator(calculator.Calculator): f.write(" ".join(format(order, "d") for order in params.orders) + "\n") f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0)) - # scattering factor output (see project.Params.phase_output_classes) + # scattering factor output (see project.CalculatorParams.phase_output_classes) if params.phase_output_classes is not None: fn = "{0}.clu".format(params.output_file) f.write("cluster output l(A) {fn}\n".format(fn=fn)) @@ -197,7 +197,7 @@ class EdacCalculator(calculator.Calculator): """ run EDAC with the given parameters and cluster. - @param params: a pmsco.project.Params object with all necessary values except cluster and output files set. + @param params: a pmsco.project.CalculatorParams object with all necessary values except cluster and output files set. @param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set. diff --git a/pmsco/calculators/msc.py b/pmsco/calculators/msc.py index 73d3185..048d7ed 100644 --- a/pmsco/calculators/msc.py +++ b/pmsco/calculators/msc.py @@ -62,7 +62,7 @@ class MscCalculator(calculator.Calculator): """ run the MSC program with the given parameters and cluster. - @param params: a project.Params() object with all necessary values except cluster and output files set. + @param params: a project.CalculatorParams() object with all necessary values except cluster and output files set. @param cluster: a cluster.Cluster(format=FMT_MSC) object with all atom positions set. diff --git a/pmsco/calculators/phagen/makefile b/pmsco/calculators/phagen/makefile index cc86a90..a5c1d7e 100644 --- a/pmsco/calculators/phagen/makefile +++ b/pmsco/calculators/phagen/makefile @@ -28,7 +28,7 @@ PYTHON_EXT_SUFFIX ?= $(shell ${PYTHON_CONFIG} --extension-suffix) all: phagen -phagen: phagen.exe phagen$(EXT_SUFFIX) +phagen: phagen.exe phagen$(PYTHON_EXT_SUFFIX) phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc $(FC) $(FCOPTS) -o phagen.exe phagen_scf.f @@ -36,7 +36,7 @@ phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc phagen.pyf: | phagen_scf.f $(F2PY) -h phagen.pyf -m phagen phagen_scf.f only: libmain -phagen$(EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc +phagen$(PYTHON_EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc $(F2PY) -c $(F2PYOPTS) -m phagen phagen.pyf phagen_scf.f clean: diff --git a/pmsco/calculators/phagen/runner.py b/pmsco/calculators/phagen/runner.py index 7b90d4d..1111d67 100644 --- a/pmsco/calculators/phagen/runner.py +++ b/pmsco/calculators/phagen/runner.py @@ -61,7 +61,7 @@ class PhagenCalculator(AtomicCalculator): because PHAGEN generates a lot of files with hard-coded names, the function creates a temporary directory for PHAGEN and deletes it before returning. - @param params: pmsco.project.Params object. + @param params: pmsco.project.CalculatorParams object. the phase_files attribute is updated with the paths of the scattering files. @param cluster: pmsco.cluster.Cluster object. @@ -76,6 +76,8 @@ class PhagenCalculator(AtomicCalculator): @return (None, dict) where dict is a list of output files with their category. the category is "atomic" for all output files. """ + assert cluster.get_emitter_count() == 1, "PHAGEN cannot handle more than one emitter at a time" + transl = Translator() transl.params.set_params(params) transl.params.set_cluster(cluster) @@ -132,6 +134,14 @@ class PhagenCalculator(AtomicCalculator): except IOError: logger.error("error loading phagen cluster file {fi}".format(fi=clufile)) + try: + listfile = outfile + ".list" + report_listfile = os.path.join(prev_wd, output_file + ".phagen.list") + shutil.copy(listfile, report_listfile) + files[report_listfile] = "log" + except IOError: + logger.error("error loading phagen list file {fi}".format(fi=listfile)) + finally: os.chdir(prev_wd) diff --git a/pmsco/calculators/phagen/translator.py b/pmsco/calculators/phagen/translator.py index afe93f4..dfee383 100644 --- a/pmsco/calculators/phagen/translator.py +++ b/pmsco/calculators/phagen/translator.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from pmsco.cluster import Cluster from pmsco.compat import open ## rydberg energy in electron volts @@ -72,7 +73,7 @@ class TranslationParams(object): """ set the translation parameters. - @param params: a pmsco.project.Params object or + @param params: a pmsco.project.CalculatorParams object or a dictionary containing some or all public fields of this class. @return: None """ @@ -125,6 +126,44 @@ class Translator(object): 6. call write_edac_scattering to produce the EDAC scattering matrix files. 7. call write_edac_emission to produce the EDAC emission matrix file. """ + + ## @var params + # + # project parameters needed for translation. + # + # fill the attributes of this object before using any translator methods. + + ## @var scattering + # + # t-matrix storage + # + # the t-matrix is stored in a flat, one-dimensional numpy structured array consisting of the following fields: + # @arg e (float) energy (eV) + # @arg a (int) atom index (1-based) + # @arg l (int) angular momentum quantum number l + # @arg t (complex) scattering matrix element, t = exp(-i * delta) * sin delta + # + # @note PHAGEN uses the convention t = exp(-i * delta) * sin delta, + # whereas EDAC uses t = exp(i * delta) * sin delta (complex conjugate). + # this object stores the t-matrix according to the PHAGEN convention. + # the conversion to the EDAC convention occurs in write_edac_scattering_file(). + + ## @var emission + # + # radial matrix element storage + # + # the radial matrix elemnts are stored in a flat, one-dimensional numpy structured array + # consisting of the following fields: + # @arg e (float) energy (eV) + # @arg dw (complex) matrix element for the transition to l-1 + # @arg up (complex) matrix element for the transition to l+1 + + ## @var cluster + # + # cluster object for PHAGEN + # + # this object is created by translate_cluster(). + def __init__(self): """ initialize the object instance. @@ -134,18 +173,33 @@ class Translator(object): self.scattering = np.empty(0, dtype=dt) dt = [('e', 'f4'), ('dw', 'c16'), ('up', 'c16')] self.emission = np.empty(0, dtype=dt) + self.cluster = None + + def translate_cluster(self): + """ + translate the cluster into a form suitable for PHAGEN. + + specifically, move the (first and hopefully only) emitter to the first atom position. + + the method copies the cluster from self.params into a new object + and stores it under self.cluster. + + @return: None + """ + self.cluster = Cluster() + self.cluster.copy_from(self.params.cluster) + ems = self.cluster.get_emitters(['i']) + self.cluster.move_to_first(idx=ems[0][0]-1) def write_cluster(self, f): """ write the cluster section of the PHAGEN input file. - requires a valid pmsco.cluster.Cluster in self.params.cluster. - @param f: file or output stream (an object with a write method) @return: None """ - for atom in self.params.cluster.data: + for atom in self.cluster.data: d = {k: atom[k] for k in atom.dtype.names} f.write("{s} {t} {x} {y} {z}\n".format(**d)) f.write("-1 -1 0. 0. 0.\n") @@ -163,7 +217,7 @@ class Translator(object): @return: None """ - data = self.params.cluster.data + data = self.cluster.data elements = np.unique(data['t']) for element in elements: idx = np.where(data['t'] == element) @@ -181,29 +235,34 @@ class Translator(object): @return: None """ phagen_params = {} + + self.translate_cluster() + phagen_params['absorber'] = 1 phagen_params['emin'] = self.params.kinetic_energies.min() / ERYDBERG phagen_params['emax'] = self.params.kinetic_energies.max() / ERYDBERG - phagen_params['delta'] = (phagen_params['emax'] - phagen_params['emin']) / \ - (self.params.kinetic_energies.shape[0] - 1) - if phagen_params['delta'] < 0.0001: + if self.params.kinetic_energies.shape[0] > 1: + phagen_params['delta'] = (phagen_params['emax'] - phagen_params['emin']) / \ + (self.params.kinetic_energies.shape[0] - 1) + else: phagen_params['delta'] = 0.1 - phagen_params['edge'] = state_to_edge(self.params.initial_state) # possibly not used + phagen_params['edge'] = state_to_edge(self.params.initial_state) phagen_params['edge1'] = 'm4' # auger not supported phagen_params['edge2'] = 'm4' # auger not supported phagen_params['cip'] = self.params.binding_energy / ERYDBERG if phagen_params['cip'] < 0.001: raise ValueError("binding energy parameter is zero.") - if np.sum(np.abs(self.params.cluster.data['q']) >= 0.001) > 0: + if np.sum(np.abs(self.cluster.data['q'])) > 0.: phagen_params['ionzst'] = 'ionic' else: phagen_params['ionzst'] = 'neutral' - if hasattr(f, "write"): + if hasattr(f, "write") and callable(f.write): f.write("&job\n") f.write("calctype='xpd',\n") f.write("coor='angs',\n") f.write("cip={cip},\n".format(**phagen_params)) + f.write("absorber={absorber},\n".format(**phagen_params)) f.write("edge='{edge}',\n".format(**phagen_params)) f.write("edge1='{edge1}',\n".format(**phagen_params)) f.write("edge2='{edge1}',\n".format(**phagen_params)) @@ -254,13 +313,18 @@ class Translator(object): @arg l angular momentum quantum number l @arg t complex scattering matrix element + @note PHAGEN uses the convention t = exp(-i * delta) * sin delta, + whereas EDAC uses t = exp(i * delta) * sin delta (complex conjugate). + this class stores the t-matrix according to the PHAGEN convention. + the conversion to the EDAC convention occurs in write_edac_scattering_file(). + @param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt). @return: None """ dt = [('e', 'f4'), ('x1', 'f4'), ('x2', 'f4'), ('na', 'i4'), ('nl', 'i4'), ('tr', 'f8'), ('ti', 'f8'), ('ph', 'f4')] - data = np.genfromtxt(f, dtype=dt) + data = np.atleast_1d(np.genfromtxt(f, dtype=dt)) self.scattering = np.resize(self.scattering, data.shape) scat = self.scattering @@ -308,7 +372,7 @@ class Translator(object): @return: None """ - if hasattr(f, "write"): + if hasattr(f, "write") and callable(f.write): energies = np.unique(scat['e']) ne = energies.shape[0] lmax = scat['l'].max() @@ -323,7 +387,7 @@ class Translator(object): if ne > 1: f.write("{0:.3f} ".format(energy)) for item in energy_scat: - f.write(" {0:.6f} {1:.6f}".format(item['t'].real, item['t'].imag)) + f.write(" {0:.6f} {1:.6f}".format(item['t'].real, -item['t'].imag)) for i in range(len(energy_scat), lmax + 1): f.write(" 0 0") f.write("\n") @@ -341,7 +405,7 @@ class Translator(object): @return: None """ - if hasattr(f, "write"): + if hasattr(f, "write") and callable(f.write): energies = np.unique(scat['e']) ne = energies.shape[0] lmax = scat['l'].max() @@ -356,7 +420,8 @@ class Translator(object): if ne > 1: f.write("{0:.3f} ".format(energy)) for item in energy_scat: - f.write(" {0:.6f}".format(np.angle(item['t']))) + pha = np.sign(item['t'].real) * np.arcsin(np.sqrt(np.abs(item['t'].imag))) + f.write(" {0:.6f}".format(pha)) for i in range(len(energy_scat), lmax + 1): f.write(" 0") f.write("\n") @@ -373,7 +438,7 @@ class Translator(object): @return: None """ dt = [('ar', 'f8'), ('ai', 'f8'), ('br', 'f8'), ('bi', 'f8')] - data = np.genfromtxt(f, dtype=dt) + data = np.atleast_1d(np.genfromtxt(f, dtype=dt)) self.emission = np.resize(self.emission, data.shape) emission = self.emission @@ -390,7 +455,7 @@ class Translator(object): @return: None """ - if hasattr(f, "write"): + if hasattr(f, "write") and callable(f.write): l0 = self.params.l_init energies = self.params.kinetic_energies emission = self.emission diff --git a/pmsco/cluster.py b/pmsco/cluster.py index 9b2a6df..e0c0301 100755 --- a/pmsco/cluster.py +++ b/pmsco/cluster.py @@ -17,7 +17,7 @@ pip install --user periodictable @author Matthias Muntwiler -@copyright (c) 2015-19 by Paul Scherrer Institut @n +@copyright (c) 2015-20 by Paul Scherrer Institut @n Licensed under the Apache License, Version 2.0 (the "License"); @n you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -54,14 +54,14 @@ if sys.version_info[0] >= 3: else: _SYMBOL_TYPE = 'S2' -## numpy.array datatype of Cluster.data array -DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), - ('e', 'u1'), ('q', 'f4'), ('c', 'i4')] -## file format of internal Cluster.data array +## numpy.array datatype of internal Cluster.data array +DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('c', 'i4'), + ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('e', 'u1'), ('q', 'f4')] +## string formatting of native file format FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"] -## field (column) names of internal Cluster.data array +## field (column) names of native file format FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q'] -## column names for export +## column names of native file format NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z', 'e': 'emitter', 'q': 'charge'} @@ -178,12 +178,12 @@ class Cluster(object): # @arg @c 'i' (int) atom index (1-based) # @arg @c 't' (int) atom type (chemical element number) # @arg @c 's' (string) chemical element symbol + # @arg @c 'c' (int) scatterer class # @arg @c 'x' (float32) x coordinate of the atom position # @arg @c 'y' (float32) t coordinate of the atom position # @arg @c 'z' (float32) z coordinate of the atom position # @arg @c 'e' (uint8) 1 = emitter, 0 = regular atom # @arg @c 'q' (float32) charge/ionicity - # @arg @c 'c' (int) scatterer class ## @var comment (str) # one-line comment that can be included in some cluster files @@ -227,7 +227,7 @@ class Cluster(object): """ self.rmax = r - def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer=0): + def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer_class=0): """ build a tuple in the format of the internal data array. @@ -241,10 +241,10 @@ class Cluster(object): @param charge: (float) ionicity. default = 0 - @param scatterer: (int) scatterer class. default = 0. + @param scatterer_class: (int) scatterer class. default = 0. """ symbol = pt.elements[element_number].symbol - element = (index, element_number, symbol, x, y, z, int(emitter), charge, scatterer) + element = (index, element_number, symbol, scatterer_class, x, y, z, int(emitter), charge) return element def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.): @@ -284,17 +284,18 @@ class Cluster(object): """ r_great = max(self.rmax, np.linalg.norm(v_pos)) n0 = self.data.shape[0] + 1 - n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 3) * 2 - n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 3) * 2 - nn = 0 - buf = np.empty((2 * n1 + 1) * (2 * n2 + 1), dtype=self.dtype) - for i1 in range(-n1, n1 + 1): - for i2 in range(-n2, n2 + 1): - v = v_pos + v_lat1 * i1 + v_lat2 * i2 - if np.linalg.norm(v) <= self.rmax: - buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0) - nn += 1 - buf = np.resize(buf, nn) + n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3 + n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3 + idx = np.mgrid[-n1:n1+1, -n2:n2+1] + idx = idx.reshape(idx.shape[0], -1) + lat = np.array([v_lat1, v_lat2]) + v = v_pos + np.matmul(idx.T, lat) + rsq = np.sum(np.square(v), axis=-1) + b1 = rsq <= self.rmax**2 + sel = b1.nonzero()[0] + buf = np.empty((len(sel)), dtype=self.dtype) + for nn, ii in enumerate(sel): + buf[nn] = self.build_element(nn + n0, atomtype, v[ii, 0], v[ii, 1], v[ii, 2], 0) self.data = np.append(self.data, buf) def add_bulk(self, atomtype, v_pos, v_lat1, v_lat2, v_lat3, z_surf=0.0): @@ -322,16 +323,18 @@ class Cluster(object): n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3 n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3 n3 = max(int(r_great / np.linalg.norm(v_lat3)) + 1, 4) * 3 - nn = 0 - buf = np.empty((2 * n1 + 1) * (2 * n2 + 1) * (n3 + 1), dtype=self.dtype) - for i1 in range(-n1, n1 + 1): - for i2 in range(-n2, n2 + 1): - for i3 in range(-n3, n3 + 1): - v = v_pos + v_lat1 * i1 + v_lat2 * i2 + v_lat3 * i3 - if np.linalg.norm(v) <= self.rmax and v[2] <= z_surf: - buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0) - nn += 1 - buf = np.resize(buf, nn) + idx = np.mgrid[-n1:n1+1, -n2:n2+1, -n3:n3+1] + idx = idx.reshape(idx.shape[0], -1) + lat = np.array([v_lat1, v_lat2, v_lat3]) + v = v_pos + np.matmul(idx.T, lat) + rsq = np.sum(np.square(v), axis=-1) + b1 = rsq <= self.rmax**2 + b2 = v[:, 2] <= z_surf + ba = np.all([b1, b2], axis=0) + sel = ba.nonzero()[0] + buf = np.empty((len(sel)), dtype=self.dtype) + for nn, ii in enumerate(sel): + buf[nn] = self.build_element(nn + n0, atomtype, v[ii, 0], v[ii, 1], v[ii, 2], 0) self.data = np.append(self.data, buf) def add_cluster(self, cluster, check_rmax=False, check_unique=False, tol=0.001): @@ -426,15 +429,47 @@ class Cluster(object): idx = np.where(b_all) self.data['z'][idx] += z_shift - return idx + return idx[0] - def translate(self, vector, element=0): + def get_center(self, element=None): + """ + get the geometric center of the cluster or a class of atoms. + + @param element: chemical element number (int) or symbol (str) + if atoms of a specific element should be considered only. + by default (element == None or 0 or ""), + all atoms are included in the calculation. + + @return: (numpy.ndarray) 3-dimensional vector. + """ + + if element: + try: + sel = self.data['t'] == int(element) + except ValueError: + sel = self.data['s'] == element + else: + sel = np.ones_like(self.data['t']) + idx = np.where(sel) + center = np.zeros(3) + center[0] = np.mean(self.data['x'][idx]) + center[1] = np.mean(self.data['y'][idx]) + center[2] = np.mean(self.data['z'][idx]) + return center + + def translate(self, vector, element=None): """ translate the cluster or all atoms of a specified element. + translation shifts each selected atom by the given vector. + @param vector: (numpy.ndarray) 3-dimensional displacement vector. - @param element: (int) chemical element number if atoms of a specific element should be affected. - by default (element = 0), all atoms are moved. + + @param element: chemical element number (int) or symbol (str) + if atoms of a specific element should be affected only. + by default (element == None or 0 or ""), + all atoms are translated. + @return: (numpy.ndarray) indices of the atoms that have been shifted. """ if element: @@ -449,7 +484,7 @@ class Cluster(object): self.data['y'][idx] += vector[1] self.data['z'][idx] += vector[2] - return idx + return idx[0] def matrix_transform(self, matrix): """ @@ -461,47 +496,49 @@ class Cluster(object): @return: None """ - for atom in self.data: - v = np.matrix([atom['x'], atom['y'], atom['z']]) - w = matrix * v.transpose() - atom['x'] = float(w[0]) - atom['y'] = float(w[1]) - atom['z'] = float(w[2]) + pos = np.empty((3, self.data.shape[0]), np.float32) + pos[0, :] = self.data['x'] + pos[1, :] = self.data['y'] + pos[2, :] = self.data['z'] + pos = np.matmul(matrix, pos) + self.data['x'] = pos[0, :] + self.data['y'] = pos[1, :] + self.data['z'] = pos[2, :] def rotate_x(self, angle): """ - rotate cluster about the surface normal axis + rotate cluster about the x-axis @param angle (float) in degrees """ angle = math.radians(angle) s = math.sin(angle) c = math.cos(angle) - matrix = np.matrix([[1, 0, 0], [0, c, -s], [0, s, c]]) + matrix = np.array([[1, 0, 0], [0, c, -s], [0, s, c]]) self.matrix_transform(matrix) def rotate_y(self, angle): """ - rotate cluster about the surface normal axis + rotate cluster about the y-axis @param angle (float) in degrees """ angle = math.radians(angle) s = math.sin(angle) c = math.cos(angle) - matrix = np.matrix([[c, 0, s], [0, 1, 0], [-s, 0, c]]) + matrix = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]]) self.matrix_transform(matrix) def rotate_z(self, angle): """ - rotate cluster about the surface normal axis + rotate cluster about the z-axis (surface normal) @param angle (float) in degrees """ angle = math.radians(angle) s = math.sin(angle) c = math.cos(angle) - matrix = np.matrix([[c, -s, 0], [s, c, 0], [0, 0, 1]]) + matrix = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]]) self.matrix_transform(matrix) def find_positions(self, pos, tol=0.001): @@ -794,6 +831,53 @@ class Cluster(object): idx = self.data['e'] != 0 return np.sum(idx) + def calc_scattering_angles(self, index_emitter, radius): + """ + calculate forward-scattering angles of the cluster atoms + + for each atom within a given radius of the emitter, + the connecting vector between emitter and scatterer is calculated + and returned in cartesian and polar coordinates. + + @param index_emitter: atom index of the emitter. + all angles are calculated with respect to this atom. + + @param radius: include only atoms within this radius of the emitter. + + @note back-scattering angles can be obtained by inverting the angle on the unit sphere: + th' = 180 - th, ph' = -ph. + + @return dictionary with results. + each item is a numpy.ndarray of shape (N, M) + where N is the number of scatterers + and M = 3 for dict['xyz'] and M = 1 otherwise. + @arg dict['index']: atom index into the cluster array. + @arg dict['xyz']: connecting vector between the emitter and the atom in cartesian coordinates. + @arg dict['dist']: distance between the emitter and the atom. + @arg dict['polar']: polar angle with respect to the z-axis. + @arg dict['azimuth']: azimuthal angle with respect to the x-axis. + """ + # position of emitter atom + em = self.data[index_emitter] + em = np.asarray((em['x'], em['y'], em['z'])) + + # relative positions of scattering atoms + xyz = self.get_positions() + xyz -= em + dist = np.linalg.norm(xyz, axis=1) + sel1 = dist <= radius + sel2 = dist > 0. + idx = np.where(np.all([sel1, sel2], axis=0)) + xyz = xyz[idx] + dist = dist[idx] + + # angles + v1 = np.asarray([0, 0, 1]) + v2 = np.transpose(xyz / dist.reshape((dist.shape[0], 1))) + th = np.degrees(np.arccos(np.clip(np.dot(v1, v2), -1., 1.))) + ph = np.degrees(np.arctan2(v2[1], v2[0])) + return {'index': idx[0], 'xyz': xyz, 'dist': dist, 'polar': th, 'azimuth': ph} + def load_from_file(self, f, fmt=FMT_DEFAULT): """ load a cluster from a file created by the scattering program. @@ -848,7 +932,7 @@ class Cluster(object): else: raise ValueError("unknown file format {}".format(fmt)) - data = np.genfromtxt(f, dtype=dtype, skip_header=sh) + data = np.atleast_1d(np.genfromtxt(f, dtype=dtype, skip_header=sh)) if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1: data = data[:-1] @@ -920,7 +1004,7 @@ class Cluster(object): or left at the default value 0 in which case PMSCO sets the correct values. if the scattering factors are loaded from existing files, - the atom class corresponds to the key of the pmsco.project.Params.phase_files dictionary. + the atom class corresponds to the key of the pmsco.project.CalculatorParams.phase_files dictionary. in this case the meaning of the class value is up to the project, and the class must be set either by the cluster generator or the project's after_atomic_scattering hook. @@ -956,7 +1040,7 @@ class Cluster(object): the other cluster must contain the same atoms (same coordinates) in a possibly random order. the atoms of this and the other cluster are matched up by sorting them by coordinate. - atomic scattering calculators often change the order of atoms in a cluster based on symmetry, + atomic scattering calculators often change the order of atoms in a cluster based on domain, and return atom classes versus atomic coordinates. this method allows to import the atom classes into the original cluster. @@ -1071,9 +1155,9 @@ class ClusterGenerator(object): def count_emitters(self, model, index): """ - return the number of emitter configurations for a particular model, scan and symmetry. + return the number of emitter configurations for a particular model, scan and domain. - the number of emitter configurations may depend on the model parameters, scan index and symmetry index. + the number of emitter configurations may depend on the model parameters, scan index and domain index. by default, the method returns 1, which means that there is only one emitter configuration. emitter configurations are mainly a way to distribute the calculations to multiple processes @@ -1100,9 +1184,9 @@ class ClusterGenerator(object): @param index (named tuple CalcID) calculation index. the method should consider only the following attributes: - @arg @c scan scan index (index into Project.scans) - @arg @c sym symmetry index (index into Project.symmetries) - @arg @c emit emitter index must be -1. + @arg scan scan index (index into Project.scans) + @arg domain domain index (index into Project.domains) + @arg emit emitter index must be -1. @return number of emitter configurations. this implementation returns the default value of 1. @@ -1114,23 +1198,23 @@ class ClusterGenerator(object): create a Cluster object given the model parameters and calculation index. the generated cluster will typically depend on the model parameters. - depending on the project, it may also depend on the scan index, symmetry index and emitter index. + depending on the project, it may also depend on the scan index, domain index and emitter index. the scan index can be used to generate a different cluster for different scan geometry, e.g., if some atoms can be excluded due to a longer mean free path. if this is not the case for the specific project, the scan index can be ignored. - the symmetry index may select a particular domain that has a different atomic arrangement. - in this case, depending on the value of index.sym, the function must generate a cluster corresponding - to the particular domain/symmetry. - the method can ignore the symmetry index if the project defines only one symmetry, - or if the symmetry does not correspond to a different atomic structure. + the domain index may select a particular domain that has a different atomic arrangement. + in this case, depending on the value of index.domain, the function must generate a cluster corresponding + to the particular domain. + the method can ignore the domain index if the project defines only one domain, + or if the domain does not correspond to a different atomic structure. the emitter index selects a particular emitter configuration. depending on the value of the emitter index, the method must react differently: 1. if the value is -1, return the full cluster and mark all inequivalent emitter atoms. - emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked. + emitters which are reproduced by a domain expansion in combine_emitters() should not be marked. the full diffraction scan will be calculated in one calculation. 2. if the value is greater or equal to zero, generate the cluster with the emitter configuration @@ -1152,9 +1236,9 @@ class ClusterGenerator(object): @param index (named tuple CalcID) calculation index. the method should consider only the following attributes: - @arg @c scan scan index (index into Project.scans) - @arg @c sym symmetry index (index into Project.symmetries) - @arg @c emit emitter index. + @arg scan scan index (index into Project.scans) + @arg domain domain index (index into Project.domains) + @arg emit emitter index. if -1, generate the full cluster and mark all emitters. if greater or equal to zero, the value is a zero-based index of the emitter configuration. diff --git a/pmsco/data.py b/pmsco/data.py index d01831e..f3e3efa 100644 --- a/pmsco/data.py +++ b/pmsco/data.py @@ -117,7 +117,7 @@ def load_plt(filename, int_column=-1): data[i]['p'] = phi data[i]['i'] = selected intensity column """ - data = np.genfromtxt(filename, usecols=(0, 2, 3, int_column), dtype=DTYPE_ETPI) + data = np.atleast_1d(np.genfromtxt(filename, usecols=(0, 2, 3, int_column), dtype=DTYPE_ETPI)) sort_data(data) return data @@ -189,7 +189,7 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_ logger.warning("unexpected EDAC output file column name") break cols = tuple(cols) - raw = np.genfromtxt(filename, usecols=cols, dtype=dtype, skip_header=2) + raw = np.atleast_1d(np.genfromtxt(filename, usecols=cols, dtype=dtype, skip_header=2)) if fixed_cluster: etpi = np.empty(raw.shape, dtype=DTYPE_ETPAI) diff --git a/pmsco/database.py b/pmsco/database.py index 01272c8..9cae8e9 100644 --- a/pmsco/database.py +++ b/pmsco/database.py @@ -29,6 +29,7 @@ import sqlite3 import fasteners import numpy as np import pmsco.dispatch as dispatch +from pmsco.helpers import BraceMessage as BMsg logger = logging.getLogger(__name__) @@ -60,7 +61,7 @@ DB_SPECIAL_PARAMS = {"job_id": "_db_job", "result_id": "_db_result", "model": "_model", "scan": "_scan", - "sym": "_sym", + "domain": "_domain", "emit": "_emit", "region": "_region", "gen": "_gen", @@ -77,7 +78,7 @@ DB_SPECIAL_NUMPY_TYPES = {"job_id": "i8", "result_id": "i8", "model": "i8", "scan": "i8", - "sym": "i8", + "domain": "i8", "emit": "i8", "region": "i8", "gen": "i8", @@ -259,7 +260,7 @@ class ResultsDatabase(object): sql_select_model = """select id, job_id, model, gen, particle from Models where id=:id""" sql_select_model_model = """select id, job_id, model, gen, particle - from Models where model=:model""" + from Models where job_id=:job_id and model=:model""" sql_select_model_job = """select id, job_id, model, gen, particle from Models where job_id=:job_id""" sql_delete_model = """delete from Models where model_id = :model_id""" @@ -268,7 +269,7 @@ class ResultsDatabase(object): `id` INTEGER PRIMARY KEY, `model_id` INTEGER, `scan` integer, - `sym` integer, + `domain` integer, `emit` integer, `region` integer, `rfac` REAL, @@ -276,22 +277,29 @@ class ResultsDatabase(object): )""" sql_index_results_tasks = """create index if not exists `index_results_tasks` ON `Results` - (`model_id`, `scan`,`sym`,`emit`,`region`)""" + (`model_id`, `scan`,`domain`,`emit`,`region`)""" sql_drop_index_results_tasks = "drop index if exists index_results_tasks" sql_index_results_models = """create index if not exists `index_results_models` ON `Results` (`id`, `model_id`)""" sql_drop_index_results_models = "drop index if exists index_results_models" - sql_insert_result = """insert into Results(model_id, scan, sym, emit, region, rfac) - values (:model_id, :scan, :sym, :emit, :region, :rfac)""" + sql_insert_result = """insert into Results(model_id, scan, domain, emit, region, rfac) + values (:model_id, :scan, :domain, :emit, :region, :rfac)""" sql_update_result = """update Results set rfac=:rfac where id=:result_id""" - sql_select_result = """select id, model_id, scan, sym, emit, region, rfac + sql_select_result = """select id, model_id, scan, domain, emit, region, rfac from Results where id=:id""" - sql_select_result_index = """select id, model_id, scan, sym, emit, region, rfac - from Results where model_id=:model_id and scan=:scan and sym=:sym and emit=:emit and region=:region""" + sql_select_result_index = """select id, model_id, scan, domain, emit, region, rfac + from Results where model_id=:model_id and scan=:scan and domain=:domain and emit=:emit and region=:region""" sql_delete_result = """delete from Results where id = :result_id""" + sql_view_results_models = """create view if not exists `ViewResultsModels` as + select project_id, job_id, model_id, Results.id as result_id, rfac, model, scan, domain, emit, region + from Models + join Results on Results.model_id = Models.id + join Jobs on Jobs.id = Models.job_id + order by project_id, job_id, rfac, model, scan, domain, emit, region + """ sql_create_params = """CREATE TABLE IF NOT EXISTS `Params` ( `id` INTEGER PRIMARY KEY, @@ -422,16 +430,6 @@ class ResultsDatabase(object): # @var _lock_filename (str). # path and name of the lock file or an empty string if no locking is used. - # @var _lock (obj). - # context manager which provides a locking mechanism for the database. - # - # this is either a fasteners.InterprocessLock or _DummyLock. - # InterprocessLock allows to serialize access to the database by means of a lock file. - # _DummyLock is used with an in-memory database which does not require locking. - # - # @note InterprocessLock is re-usable but not re-entrant. - # Be careful not to nest contexts when calling other methods from within this class! - def __init__(self): self._conn = None self._db_filename = "" @@ -440,7 +438,6 @@ class ResultsDatabase(object): self._model_params = {} self._tags = {} self._lock_filename = "" - self._lock = None def connect(self, db_filename, lock_filename=""): """ @@ -469,14 +466,10 @@ class ResultsDatabase(object): self._lock_filename = "" else: self._lock_filename = db_filename + ".lock" - if self._lock_filename: - self._lock = fasteners.InterProcessLock(self._lock_filename) - else: - self._lock = _DummyLock() self._conn = sqlite3.connect(self._db_filename) self._conn.row_factory = sqlite3.Row - with self._lock: + with self.lock(): self._conn.execute("PRAGMA foreign_keys = 1") self._conn.commit() c = self._conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='Models'") @@ -496,7 +489,6 @@ class ResultsDatabase(object): if self._conn is not None: self._conn.close() self._conn = None - self._lock = None def check_connection(self): """ @@ -511,9 +503,25 @@ class ResultsDatabase(object): @raise AssertionError if the connection is not valid. """ - assert self._lock is not None, "database not connected" assert self._conn is not None, "database not connected" + def lock(self): + """ + create a file-lock context manager for the database. + + this is either a fasteners.InterProcessLock object on self._lock_filename + or a _DummyLock object if the database is in memory. + InterprocessLock allows to serialize access to the database by means of a lock file. + this is necessary if multiple pmsco instances require access to the same database. + _DummyLock is used with an in-memory database which does not require locking. + + the lock object can be used as context-manager in a with statement. + """ + if self._lock_filename: + return fasteners.InterProcessLock(self._lock_filename) + else: + return _DummyLock() + def create_schema(self): """ create the database schema (tables and indices). @@ -525,7 +533,7 @@ class ResultsDatabase(object): @return: None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: self._conn.execute(self.sql_create_projects) self._conn.execute(self.sql_create_jobs) self._conn.execute(self.sql_create_models) @@ -539,19 +547,23 @@ class ResultsDatabase(object): self._conn.execute(self.sql_index_paramvalues) self._conn.execute(self.sql_index_jobtags) self._conn.execute(self.sql_index_models) + self._conn.execute(self.sql_view_results_models) def register_project(self, name, code): """ register a project with the database. @param name: name of the project. alphanumeric characters only. no spaces or special characters! + if a project of the same name exists in the database, + the id of the existing entry is returned. + the existing entry is not modified. @param code: name of the pmsco module that defines the project. @return: id value of the project in the database. """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(self.sql_select_project_name, {'name': name}) v = c.fetchone() if v: @@ -574,7 +586,7 @@ class ResultsDatabase(object): @return None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: param_dict = {'project_id': project_id} self._conn.execute(self.sql_delete_project, param_dict) @@ -585,7 +597,11 @@ class ResultsDatabase(object): @param project_id: identifier of the project. see register_project(). - @param name: name of the job. up to the user, must be unique within a project. + @param name: name of the job. alphanumeric characters only. no spaces or special characters! + must be unique within a project. + if a job of the same name and same project exists in the database, + the id of the existing entry is returned. + the existing entry is not modified. @param mode: optimization mode string (should be same as command line argument). @@ -600,7 +616,7 @@ class ResultsDatabase(object): @return: id value of the job in the database. """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(self.sql_select_job_name, {'project_id': project_id, 'name': name}) v = c.fetchone() if v: @@ -630,7 +646,7 @@ class ResultsDatabase(object): @return None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: param_dict = {'job_id': job_id} self._conn.execute(self.sql_delete_job, param_dict) @@ -669,7 +685,7 @@ class ResultsDatabase(object): @return: id value of the job in the database """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: job_id = self._query_job_name(job_name, project_id=project_id) return job_id @@ -686,7 +702,7 @@ class ResultsDatabase(object): @return: id value of the parameter in the database. """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: return self._register_param(key) def _register_param(self, key): @@ -721,7 +737,7 @@ class ResultsDatabase(object): @return: None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: for key in model_params: if key[0] != '_': self._register_param(key) @@ -762,7 +778,7 @@ class ResultsDatabase(object): params = {} self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(sql, args) for row in c: params[row['key']] = row['param_id'] @@ -790,7 +806,7 @@ class ResultsDatabase(object): @return: id value of the tag in the database. """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: return self._register_tag(key) def _register_tag(self, key): @@ -825,7 +841,7 @@ class ResultsDatabase(object): @return: None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: for key in tags: self._register_tag(key) @@ -865,7 +881,7 @@ class ResultsDatabase(object): tags = {} self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(sql, args) for row in c: tags[row['key']] = row['tag_id'] @@ -889,7 +905,7 @@ class ResultsDatabase(object): tags = {} self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(sql, args) for row in c: tags[row['key']] = row['value'] @@ -912,7 +928,7 @@ class ResultsDatabase(object): @return: None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: for key, value in tags.items(): try: tag_id = self._tags[key] @@ -965,7 +981,7 @@ class ResultsDatabase(object): params = self.query_project_params(project_id, job_id) params.update(self._model_params) param_names = sorted(params, key=lambda s: s.lower()) - with self._lock, self._conn: + with self.lock(), self._conn: if job_id: view_name = "ViewModelsJob{0}".format(job_id) else: @@ -1009,7 +1025,7 @@ class ResultsDatabase(object): @raise KeyError if a parameter hasn't been registered. """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: # insert model record model_dict = {'job_id': self.job_id, 'gen': None, 'particle': None} model_dict.update(special_params(model_params)) @@ -1036,7 +1052,7 @@ class ResultsDatabase(object): @return None """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: param_dict = {'model_id': model_id} self._conn.execute(self.sql_delete_model, param_dict) @@ -1048,7 +1064,7 @@ class ResultsDatabase(object): @return: dict """ self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(self.sql_select_paramvalue_model, {'model_id': model_id}) d = {} for row in c: @@ -1084,7 +1100,7 @@ class ResultsDatabase(object): @param filter: list of filter expressions. each expression is a relational expression of the form field operator value, where field is a unique field name of the Projects, Jobs, Models or Results table, e.g. - `job_id`, `model`, `rfac`, `scan`, `sym`, etc. + `job_id`, `model`, `rfac`, `scan`, `domain`, etc. operator is one of the relational operators in SQL syntax. value is a numeric or string constant, the latter including single or double quotes. if the list is empty, no filtering is applied. @@ -1102,7 +1118,7 @@ class ResultsDatabase(object): """ self.check_connection() filter += [" project_id = {0} ".format(self.project_id)] - with self._lock, self._conn: + with self.lock(), self._conn: sql = "select distinct Models.id as model_id, model " sql += "from Models " sql += "join Results on Models.id = Results.model_id " @@ -1147,7 +1163,7 @@ class ResultsDatabase(object): @param filter: list of filter expressions. each expression is a relational expression of the form field operator value, where field is a unique field name of the Projects, Jobs, Models or Results table, e.g. - `job_id`, `model`, `rfac`, `scan`, `sym`, etc. + `job_id`, `model`, `rfac`, `scan`, `domain`, etc. operator is one of the relational operators in SQL syntax. value is a numeric or string constant, the latter including single or double quotes. if the list is empty, no filtering is applied. @@ -1161,9 +1177,9 @@ class ResultsDatabase(object): """ self.check_connection() filter += [" project_id = {0} ".format(self.project_id)] - with self._lock, self._conn: + with self.lock(), self._conn: sql = "select Results.id as result_id, model_id, job_id, " - sql += "model, scan, sym, emit, region, rfac, gen, particle " + sql += "model, scan, domain, emit, region, rfac, gen, particle " sql += "from Models " sql += "join Results on Models.id = Results.model_id " sql += "join Jobs on Models.job_id = Jobs.id " @@ -1172,7 +1188,7 @@ class ResultsDatabase(object): sql += "where " sql += " and ".join(filter) sql += " " - sql += "order by rfac, job_id, model, scan, sym, emit, region " + sql += "order by rfac, job_id, model, scan, domain, emit, region " if limit: sql += "limit {0} ".format(limit) c = self._conn.execute(sql) @@ -1240,7 +1256,7 @@ class ResultsDatabase(object): level_name = dispatch.CALC_LEVELS[4] self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: sql = "select Models.id from Models " sql += "join Results on Models.id = Results.model_id " sql += "join Jobs on Models.job_id = Jobs.id " @@ -1250,7 +1266,7 @@ class ResultsDatabase(object): sql += "and Models.job_id in ({0}) ".format(",".join(map(str, job_ids))) sql += "group by Models.job_id " sql += "having min(rfac) " - sql += "order by rfac, job_id, model, scan, sym, emit, region " + sql += "order by rfac, job_id, model, scan, domain, emit, region " c = self._conn.execute(sql) models = [row['id'] for row in c] @@ -1261,7 +1277,7 @@ class ResultsDatabase(object): query the task index used in a calculation job. this query neglects the model index - and returns the unique tuples (-1, scan, sym, emit, region). + and returns the unique tuples (-1, scan, domain, emit, region). @param job_id: (int) id of the associated Jobs entry. if 0, self.job_id is used. @@ -1273,8 +1289,8 @@ class ResultsDatabase(object): job_id = self.job_id self.check_connection() - with self._lock, self._conn: - sql = "select scan, sym, emit, region " + with self.lock(), self._conn: + sql = "select scan, domain, emit, region " sql += "from Models " sql += "join Results on Models.id = Results.model_id " sql += "join Jobs on Models.job_id = Jobs.id " @@ -1323,7 +1339,7 @@ class ResultsDatabase(object): sql += "join Results on Models.id = Results.model_id " sql += "where Models.job_id = :job_id " sql += "and scan = :scan " - sql += "and sym = :sym " + sql += "and domain = :domain " sql += "and emit = :emit " sql += "and region = :region " sql += "order by rfac " @@ -1334,7 +1350,7 @@ class ResultsDatabase(object): tasks = self.query_tasks(job_id) models = set([]) - with self._lock, self._conn: + with self.lock(), self._conn: for task in tasks: if task.numeric_level <= level: d = task._asdict() @@ -1360,7 +1376,7 @@ class ResultsDatabase(object): @param index: (pmsco.dispatch.CalcID or dict) calculation index. in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e., - '_model', '_scan', '_sym', '_emit', '_region'. + '_model', '_scan', '_domain', '_emit', '_region'. extra values in the dictionary are ignored. undefined indices must be -1. @@ -1377,11 +1393,13 @@ class ResultsDatabase(object): job_id = self.job_id self.check_connection() - with self._lock, self._conn: + with self.lock(), self._conn: model_id = self._insert_result_model(job_id, index, result) result_id = self._insert_result_data(model_id, index, result) self._insert_result_paramvalues(model_id, result) + logger.debug(BMsg("database insert result: job {}, model {}, result {}", job_id, model_id, result_id)) + return result_id def _insert_result_model(self, job_id, index, result): @@ -1402,7 +1420,7 @@ class ResultsDatabase(object): @param index: (pmsco.dispatch.CalcID or dict) calculation index. in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e., - '_model', '_scan', '_sym', '_emit', '_region'. + '_model', '_scan', '_domain', '_emit', '_region'. extra values in the dictionary are ignored. undefined indices must be -1. @@ -1448,7 +1466,7 @@ class ResultsDatabase(object): @param index: (pmsco.dispatch.CalcID or dict) calculation index. in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e., - '_model', '_scan', '_sym', '_emit', '_region'. + '_model', '_scan', '_domain', '_emit', '_region'. extra values in the dictionary are ignored. undefined indices must be -1. @param result: (dict) dictionary containing the parameter values and the '_rfac' result. @@ -1525,7 +1543,7 @@ class ResultsDatabase(object): if not job_id: job_id = self.job_id - data = np.genfromtxt(filename, names=True) + data = np.atleast_1d(np.genfromtxt(filename, names=True)) self.register_params(data.dtype.names) try: unique_models, unique_index = np.unique(data['_model'], True) @@ -1552,7 +1570,7 @@ class ResultsDatabase(object): model = unique_models[0] result_entry = {'model_id': model_ids[model], 'scan': -1, - 'sym': -1, + 'domain': -1, 'emit': -1, 'region': -1, 'rfac': None} @@ -1571,7 +1589,7 @@ class ResultsDatabase(object): 'value': value} yield param_entry - with self._lock, self._conn: + with self.lock(), self._conn: c = self._conn.execute(self.sql_select_model_job, {'job_id': job_id}) v = c.fetchone() if v: diff --git a/pmsco/dispatch.py b/pmsco/dispatch.py index db2225c..e46fb86 100644 --- a/pmsco/dispatch.py +++ b/pmsco/dispatch.py @@ -21,6 +21,8 @@ import signal import collections import copy import logging +import math + from attrdict import AttrDict from mpi4py import MPI from pmsco.helpers import BraceMessage as BMsg @@ -53,7 +55,7 @@ TAG_ERROR_ABORTING = 4 ## levels of calculation tasks # -CALC_LEVELS = ('model', 'scan', 'sym', 'emit', 'region') +CALC_LEVELS = ('model', 'scan', 'domain', 'emit', 'region') ## intermediate sub-class of CalcID # @@ -159,13 +161,13 @@ class CalculationTask(object): @arg @c id.model structure number or iteration (handled by the mode module) @arg @c id.scan scan number (handled by the project) - @arg @c id.sym symmetry number (handled by the project) + @arg @c id.domain domain number (handled by the project) @arg @c id.emit emitter number (handled by the project) @arg @c id.region region number (handled by the region handler) specified members must be greater or equal to zero. -1 is the wildcard which is used in parent tasks, - where, e.g., no specific symmetry is chosen. + where, e.g., no specific domain is chosen. the root task has the ID (-1, -1, -1, -1, -1). """ @@ -311,7 +313,8 @@ class CalculationTask(object): format input or output file name including calculation index. @param overrides optional keyword arguments override object fields. - the following keywords are handled: @c root, @c model, @c scan, @c sym, @c emit, @c region, @c ext. + the following keywords are handled: + `root`, `model`, `scan`, `domain`, `emit`, `region`, `ext`. @return a string consisting of the concatenation of the base name, the ID, and the extension. """ @@ -322,7 +325,7 @@ class CalculationTask(object): for key in overrides.keys(): parts[key] = overrides[key] - filename = "{root}_{model}_{scan}_{sym}_{emit}_{region}{ext}".format(**parts) + filename = "{root}_{model}_{scan}_{domain}_{emit}_{region}{ext}".format(**parts) return filename def copy(self): @@ -462,7 +465,7 @@ class CachedCalculationMethod(object): def wrapped_func(inst, model, index): # note: _replace returns a new instance of the namedtuple index = index._replace(emit=-1, region=-1) - cache_index = (id(inst), index.model, index.scan, index.sym) + cache_index = (id(inst), index.model, index.scan, index.domain) try: result = self._cache[cache_index] except KeyError: @@ -565,6 +568,8 @@ class MscoProcess(object): """ clean up after all calculations. + this method must be called after run() has finished. + @return: None """ pass @@ -693,7 +698,7 @@ class MscoProcess(object): parameters generation is delegated to the project's create_params method. @param task: CalculationTask with all attributes set for the calculation. - @return: pmsco.project.Params object for the calculator. + @return: pmsco.project.CalculatorParams object for the calculator. """ par = self._project.create_params(task.model, task.id) @@ -711,7 +716,7 @@ class MscoProcess(object): @param task: CalculationTask with all attributes set for the calculation. - @param par: pmsco.project.Params object for the calculator. + @param par: pmsco.project.CalculatorParams object for the calculator. its phase_files attribute is updated with the created scattering files. the radial matrix elements are not changed (but may be in a future version). @@ -740,7 +745,7 @@ class MscoProcess(object): calculate the multiple scattering intensity. @param task: CalculationTask with all attributes set for the calculation. - @param par: pmsco.project.Params object for the calculator. + @param par: pmsco.project.CalculatorParams object for the calculator. @param clu: pmsco.cluster.Cluster object for the calculator. @return: None """ @@ -820,7 +825,7 @@ class MscoMaster(MscoProcess): ## @var task_handlers # (AttrDict) dictionary of task handler objects # - # the keys are the task levels 'model', 'scan', 'sym', 'emit' and 'region'. + # the keys are the task levels 'model', 'scan', 'domain', 'emit' and 'region'. # the values are handlers.TaskHandler objects. # the objects can be accessed in attribute or dictionary notation. @@ -854,12 +859,18 @@ class MscoMaster(MscoProcess): the method notifies the handlers of the number of available slave processes (slots). some of the tasks handlers adjust their branching according to the number of slots. - this mechanism may be used to balance the load between the task levels. - however, the current implementation is very coarse in this respect. - it advertises all slots to the model handler but a reduced number to the remaining handlers - depending on the operation mode. - the region handler receives a maximum of 4 slots except in single calculation mode. - in single calculation mode, all slots can be used by all handlers. + + this mechanism may be used to adjust the priorities of the task levels, + i.e., whether one slot handles all calculations of one model + so that all models of a generation finish around the same time, + or whether a model is finished completely before the next one is calculated + so that a result is returned as soon as possible. + + the current algorithm tries to pass as many slots as available + down to the lowest level (region) in order to minimize wall time. + the lowest level is restricted to the minimum number of splits + only if the intermediate levels create a lot of branches, + in which case splitting scans would not offer a performance benefit. """ super(MscoMaster, self).setup(project) @@ -869,7 +880,7 @@ class MscoMaster(MscoProcess): self._root_task = CalculationTask() self._root_task.file_root = project.output_file - self._root_task.model = project.create_domain().start + self._root_task.model = project.create_model_space().start for level in self.task_levels: self.task_handlers[level] = project.handler_classes[level]() @@ -877,14 +888,22 @@ class MscoMaster(MscoProcess): self.task_handlers.model.datetime_limit = self.datetime_limit slaves_adj = max(self._slaves, 1) - self.task_handlers.model.setup(project, slaves_adj) - if project.mode != "single": - slaves_adj = max(slaves_adj / 2, 1) - self.task_handlers.scan.setup(project, slaves_adj) - self.task_handlers.sym.setup(project, slaves_adj) - self.task_handlers.emit.setup(project, slaves_adj) - if project.mode != "single": + n_models = self.task_handlers.model.setup(project, slaves_adj) + if n_models > 1: + slaves_adj = max(int(slaves_adj / 2), 1) + n_scans = self.task_handlers.scan.setup(project, slaves_adj) + if n_scans > 1: + slaves_adj = max(int(slaves_adj / 2), 1) + n_doms = self.task_handlers.domain.setup(project, slaves_adj) + if n_doms > 1: + slaves_adj = max(int(slaves_adj / 2), 1) + n_emits = self.task_handlers.emit.setup(project, slaves_adj) + if n_emits > 1: + slaves_adj = max(int(slaves_adj / 2), 1) + n_extra = max(n_scans, n_doms, n_emits) + if n_extra > slaves_adj * 2: slaves_adj = min(slaves_adj, 4) + logger.debug(BMsg("{regions} slots available for region handler", regions=slaves_adj)) self.task_handlers.region.setup(project, slaves_adj) project.setup(self.task_handlers) @@ -911,6 +930,7 @@ class MscoMaster(MscoProcess): else: self._dispatch_tasks() self._receive_result() + self._cleanup_tasks() self._check_finish() logger.debug("master exiting main loop") @@ -918,12 +938,32 @@ class MscoMaster(MscoProcess): self._save_report() def cleanup(self): + """ + clean up after all calculations. + + this method must be called after run() has finished. + + in the master process, this calls cleanup() of each task handler and of the project. + + @return: None + """ logger.debug("master entering cleanup") for level in reversed(self.task_levels): self.task_handlers[level].cleanup() self._project.cleanup() super(MscoMaster, self).cleanup() + def _cleanup_tasks(self): + """ + periodic clean-up in the main loop. + + once per iteration of the main loop, this method cleans up unnecessary files. + this is done by the project's cleanup_files() method. + + @return: None + """ + self._project.cleanup_files() + def _dispatch_results(self): """ pass results through the post-processing modules. @@ -1122,9 +1162,9 @@ class MscoMaster(MscoProcess): scan_tasks = self.task_handlers.scan.create_tasks(task) for scan_task in scan_tasks: - sym_tasks = self.task_handlers.sym.create_tasks(scan_task) - for sym_task in sym_tasks: - emitter_tasks = self.task_handlers.emit.create_tasks(sym_task) + dom_tasks = self.task_handlers.domain.create_tasks(scan_task) + for dom_task in dom_tasks: + emitter_tasks = self.task_handlers.emit.create_tasks(dom_task) for emitter_task in emitter_tasks: region_tasks = self.task_handlers.region.create_tasks(emitter_task) for region_task in region_tasks: diff --git a/pmsco/elements/__init__.py b/pmsco/elements/__init__.py new file mode 100644 index 0000000..43a2718 --- /dev/null +++ b/pmsco/elements/__init__.py @@ -0,0 +1,41 @@ +""" +@package pmsco.elements +extended properties of the elements + +this package extends the element table of the `periodictable` package +(https://periodictable.readthedocs.io/en/latest/index.html) +by additional attributes like the electron binding energies. + +the package requires the periodictable package (https://pypi.python.org/pypi/periodictable). + + +@author Matthias Muntwiler + +@copyright (c) 2020 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +import periodictable.core + + +def _load_binding_energy(): + """ + delayed loading of the binding energy table. + """ + from . import bindingenergy + bindingenergy.init(periodictable.core.default_table()) + + +def _load_photoionization(): + """ + delayed loading of the binding energy table. + """ + from . import photoionization + photoionization.init(periodictable.core.default_table()) + + +periodictable.core.delayed_load(['binding_energy'], _load_binding_energy) +periodictable.core.delayed_load(['photoionization'], _load_photoionization) diff --git a/pmsco/elements/bindingenergy.json b/pmsco/elements/bindingenergy.json new file mode 100644 index 0000000..448c060 --- /dev/null +++ b/pmsco/elements/bindingenergy.json @@ -0,0 +1,93 @@ +{ "1": {"1s": 13.6}, + "2": {"1s": 24.6}, + "3": {"1s": 54.7}, + "4": {"1s": 111.5}, + "5": {"1s": 188.0}, + "6": {"1s": 284.2}, + "7": {"1s": 399.6, "2s": 27.0}, + "8": {"1s": 543.1, "2s": 41.6}, + "9": {"1s": 696.7}, + "10": {"1s": 870.2, "2s": 48.5, "2p1/2": 21.7, "2p3/2": 21.6}, + "11": {"1s": 1070.8, "2s": 63.5, "2p1/2": 30.65, "2p3/2": 30.81}, + "12": {"1s": 1303.0, "2s": 88.7, "2p1/2": 49.78, "2p3/2": 49.5}, + "13": {"1s": 1559.6, "2s": 117.8, "2p1/2": 72.95, "2p3/2": 72.55}, + "14": {"1s": 1839.0, "2s": 149.7, "2p1/2": 99.82, "2p3/2": 99.42}, + "15": {"1s": 2145.5, "2s": 189.0, "2p1/2": 136.0, "2p3/2": 135.0}, + "16": {"1s": 2472.0, "2s": 230.9, "2p1/2": 163.6, "2p3/2": 162.5}, + "17": {"1s": 2822.4, "2s": 270.0, "2p1/2": 202.0, "2p3/2": 200.0}, + "18": {"1s": 3205.9, "2s": 326.3, "2p1/2": 250.6, "2p3/2": 248.4, "3s": 29.3, "3p1/2": 15.9, "3p3/2": 15.7}, + "19": {"1s": 3608.4, "2s": 378.6, "2p1/2": 297.3, "2p3/2": 294.6, "3s": 34.8, "3p1/2": 18.3, "3p3/2": 18.3}, + "20": {"1s": 4038.5, "2s": 438.4, "2p1/2": 349.7, "2p3/2": 346.2, "3s": 44.3, "3p1/2": 25.4, "3p3/2": 25.4}, + "21": {"1s": 4492.0, "2s": 498.0, "2p1/2": 403.6, "2p3/2": 398.7, "3s": 51.1, "3p1/2": 28.3, "3p3/2": 28.3}, + "22": {"1s": 4966.0, "2s": 560.9, "2p1/2": 460.2, "2p3/2": 453.8, "3s": 58.7, "3p1/2": 32.6, "3p3/2": 32.6}, + "23": {"1s": 5465.0, "2s": 626.7, "2p1/2": 519.8, "2p3/2": 512.1, "3s": 66.3, "3p1/2": 37.2, "3p3/2": 37.2}, + "24": {"1s": 5989.0, "2s": 696.0, "2p1/2": 583.8, "2p3/2": 574.1, "3s": 74.1, "3p1/2": 42.2, "3p3/2": 42.2}, + "25": {"1s": 6539.0, "2s": 769.1, "2p1/2": 649.9, "2p3/2": 638.7, "3s": 82.3, "3p1/2": 47.2, "3p3/2": 47.2}, + "26": {"1s": 7112.0, "2s": 844.6, "2p1/2": 719.9, "2p3/2": 706.8, "3s": 91.3, "3p1/2": 52.7, "3p3/2": 52.7}, + "27": {"1s": 7709.0, "2s": 925.1, "2p1/2": 793.2, "2p3/2": 778.1, "3s": 101.0, "3p1/2": 58.9, "3p3/2": 59.9}, + "28": {"1s": 8333.0, "2s": 1008.6, "2p1/2": 870.0, "2p3/2": 852.7, "3s": 110.8, "3p1/2": 68.0, "3p3/2": 66.2}, + "29": {"1s": 8979.0, "2s": 1096.7, "2p1/2": 952.3, "2p3/2": 932.7, "3s": 122.5, "3p1/2": 77.3, "3p3/2": 75.1}, + "30": {"1s": 9659.0, "2s": 1196.2, "2p1/2": 1044.9, "2p3/2": 1021.8, "3s": 139.8, "3p1/2": 91.4, "3p3/2": 88.6, "3d3/2": 10.2, "3d5/2": 10.1}, + "31": {"1s": 10367.0, "2s": 1299.0, "2p1/2": 1143.2, "2p3/2": 1116.4, "3s": 159.5, "3p1/2": 103.5, "3p3/2": 100.0, "3d3/2": 18.7, "3d5/2": 18.7}, + "32": {"1s": 11103.0, "2s": 1414.6, "2p1/2": 1248.1, "2p3/2": 1217.0, "3s": 180.1, "3p1/2": 124.9, "3p3/2": 120.8, "3d3/2": 29.8, "3d5/2": 29.2}, + "33": {"1s": 11867.0, "2s": 1527.0, "2p1/2": 1359.1, "2p3/2": 1323.6, "3s": 204.7, "3p1/2": 146.2, "3p3/2": 141.2, "3d3/2": 41.7, "3d5/2": 41.7}, + "34": {"1s": 12658.0, "2s": 1652.0, "2p1/2": 1474.3, "2p3/2": 1433.9, "3s": 229.6, "3p1/2": 166.5, "3p3/2": 160.7, "3d3/2": 55.5, "3d5/2": 54.6}, + "35": {"1s": 13474.0, "2s": 1782.0, "2p1/2": 1596.0, "2p3/2": 1550.0, "3s": 257.0, "3p1/2": 189.0, "3p3/2": 182.0, "3d3/2": 70.0, "3d5/2": 69.0}, + "36": {"1s": 14326.0, "2s": 1921.0, "2p1/2": 1730.9, "2p3/2": 1678.4, "3s": 292.8, "3p1/2": 222.2, "3p3/2": 214.4, "3d3/2": 95.0, "3d5/2": 93.8, "4s": 27.5, "4p1/2": 14.1, "4p3/2": 14.1}, + "37": {"1s": 15200.0, "2s": 2065.0, "2p1/2": 1864.0, "2p3/2": 1804.0, "3s": 326.7, "3p1/2": 248.7, "3p3/2": 239.1, "3d3/2": 113.0, "3d5/2": 112.0, "4s": 30.5, "4p1/2": 16.3, "4p3/2": 15.3}, + "38": {"1s": 16105.0, "2s": 2216.0, "2p1/2": 2007.0, "2p3/2": 1940.0, "3s": 358.7, "3p1/2": 280.3, "3p3/2": 270.0, "3d3/2": 136.0, "3d5/2": 134.2, "4s": 38.9, "4p1/2": 21.3, "4p3/2": 20.1}, + "39": {"1s": 17038.0, "2s": 2373.0, "2p1/2": 2156.0, "2p3/2": 2080.0, "3s": 392.0, "3p1/2": 310.6, "3p3/2": 298.8, "3d3/2": 157.7, "3d5/2": 155.8, "4s": 43.8, "4p1/2": 24.4, "4p3/2": 23.1}, + "40": {"1s": 17998.0, "2s": 2532.0, "2p1/2": 2307.0, "2p3/2": 2223.0, "3s": 430.3, "3p1/2": 343.5, "3p3/2": 329.8, "3d3/2": 181.1, "3d5/2": 178.8, "4s": 50.6, "4p1/2": 28.5, "4p3/2": 27.1}, + "41": {"1s": 18986.0, "2s": 2698.0, "2p1/2": 2465.0, "2p3/2": 2371.0, "3s": 466.6, "3p1/2": 376.1, "3p3/2": 360.6, "3d3/2": 205.0, "3d5/2": 202.3, "4s": 56.4, "4p1/2": 32.6, "4p3/2": 30.8}, + "42": {"1s": 20000.0, "2s": 2866.0, "2p1/2": 2625.0, "2p3/2": 2520.0, "3s": 506.3, "3p1/2": 411.6, "3p3/2": 394.0, "3d3/2": 231.1, "3d5/2": 227.9, "4s": 63.2, "4p1/2": 37.6, "4p3/2": 35.5}, + "43": {"1s": 21044.0, "2s": 3043.0, "2p1/2": 2793.0, "2p3/2": 2677.0, "3s": 544.0, "3p1/2": 447.6, "3p3/2": 417.7, "3d3/2": 257.6, "3d5/2": 253.9, "4s": 69.5, "4p1/2": 42.3, "4p3/2": 39.9}, + "44": {"1s": 22117.0, "2s": 3224.0, "2p1/2": 2967.0, "2p3/2": 2838.0, "3s": 586.1, "3p1/2": 483.5, "3p3/2": 461.4, "3d3/2": 284.2, "3d5/2": 280.0, "4s": 75.0, "4p1/2": 46.3, "4p3/2": 43.2}, + "45": {"1s": 23220.0, "2s": 3412.0, "2p1/2": 3146.0, "2p3/2": 3004.0, "3s": 628.1, "3p1/2": 521.3, "3p3/2": 496.5, "3d3/2": 311.9, "3d5/2": 307.2, "4s": 81.4, "4p1/2": 50.5, "4p3/2": 47.3}, + "46": {"1s": 24350.0, "2s": 3604.0, "2p1/2": 3330.0, "2p3/2": 3173.0, "3s": 671.6, "3p1/2": 559.9, "3p3/2": 532.3, "3d3/2": 340.5, "3d5/2": 335.2, "4s": 87.1, "4p1/2": 55.7, "4p3/2": 50.9}, + "47": {"1s": 25514.0, "2s": 3806.0, "2p1/2": 3524.0, "2p3/2": 3351.0, "3s": 719.0, "3p1/2": 603.8, "3p3/2": 573.0, "3d3/2": 374.0, "3d5/2": 368.3, "4s": 97.0, "4p1/2": 63.7, "4p3/2": 58.3}, + "48": {"1s": 26711.0, "2s": 4018.0, "2p1/2": 3727.0, "2p3/2": 3538.0, "3s": 772.0, "3p1/2": 652.6, "3p3/2": 618.4, "3d3/2": 411.9, "3d5/2": 405.2, "4s": 109.8, "4p1/2": 63.9, "4p3/2": 63.9, "4d3/2": 11.7, "4d5/2": 10.7}, + "49": {"1s": 27940.0, "2s": 4238.0, "2p1/2": 3938.0, "2p3/2": 3730.0, "3s": 827.2, "3p1/2": 703.2, "3p3/2": 665.3, "3d3/2": 451.4, "3d5/2": 443.9, "4s": 122.9, "4p1/2": 73.5, "4p3/2": 73.5, "4d3/2": 17.7, "4d5/2": 16.9}, + "50": {"1s": 29200.0, "2s": 4465.0, "2p1/2": 4156.0, "2p3/2": 3929.0, "3s": 884.7, "3p1/2": 756.5, "3p3/2": 714.6, "3d3/2": 493.2, "3d5/2": 484.9, "4s": 137.1, "4p1/2": 83.6, "4p3/2": 83.6, "4d3/2": 24.9, "4d5/2": 23.9}, + "51": {"1s": 30491.0, "2s": 4698.0, "2p1/2": 4380.0, "2p3/2": 4132.0, "3s": 946.0, "3p1/2": 812.7, "3p3/2": 766.4, "3d3/2": 537.5, "3d5/2": 528.2, "4s": 153.2, "4p1/2": 95.6, "4p3/2": 95.6, "4d3/2": 33.3, "4d5/2": 32.1}, + "52": {"1s": 31814.0, "2s": 4939.0, "2p1/2": 4612.0, "2p3/2": 4341.0, "3s": 1006.0, "3p1/2": 870.8, "3p3/2": 820.0, "3d3/2": 583.4, "3d5/2": 573.0, "4s": 169.4, "4p1/2": 103.3, "4p3/2": 103.3, "4d3/2": 41.9, "4d5/2": 40.4}, + "53": {"1s": 33169.0, "2s": 5188.0, "2p1/2": 4852.0, "2p3/2": 4557.0, "3s": 1072.0, "3p1/2": 931.0, "3p3/2": 875.0, "3d3/2": 630.8, "3d5/2": 619.3, "4s": 186.0, "4p1/2": 123.0, "4p3/2": 123.0, "4d3/2": 50.6, "4d5/2": 48.9}, + "54": {"1s": 34561.0, "2s": 5453.0, "2p1/2": 5107.0, "2p3/2": 4786.0, "3s": 1148.7, "3p1/2": 1002.1, "3p3/2": 940.6, "3d3/2": 689.0, "3d5/2": 676.4, "4s": 213.2, "4p1/2": 146.7, "4p3/2": 145.5, "4d3/2": 69.5, "4d5/2": 67.5, "5s": 23.3, "5p1/2": 13.4, "5p3/2": 12.1}, + "55": {"1s": 35985.0, "2s": 5714.0, "2p1/2": 5359.0, "2p3/2": 5012.0, "3s": 1211.0, "3p1/2": 1071.0, "3p3/2": 1003.0, "3d3/2": 740.5, "3d5/2": 726.6, "4s": 232.3, "4p1/2": 172.4, "4p3/2": 161.3, "4d3/2": 79.8, "4d5/2": 77.5, "5s": 22.7, "5p1/2": 14.2, "5p3/2": 12.1}, + "56": {"1s": 37441.0, "2s": 5989.0, "2p1/2": 5624.0, "2p3/2": 5247.0, "3s": 1293.0, "3p1/2": 1137.0, "3p3/2": 1063.0, "3d3/2": 795.7, "3d5/2": 780.5, "4s": 253.5, "4p1/2": 192.0, "4p3/2": 178.6, "4d3/2": 92.6, "4d5/2": 89.9, "5s": 30.3, "5p1/2": 17.0, "5p3/2": 14.8}, + "57": {"1s": 38925.0, "2s": 6266.0, "2p1/2": 5891.0, "2p3/2": 5483.0, "3s": 1362.0, "3p1/2": 1209.0, "3p3/2": 1128.0, "3d3/2": 853.0, "3d5/2": 836.0, "4s": 274.7, "4p1/2": 205.8, "4p3/2": 196.0, "4d3/2": 105.3, "4d5/2": 102.5, "5s": 34.3, "5p1/2": 19.3, "5p3/2": 16.8}, + "58": {"1s": 40443.0, "2s": 6549.0, "2p1/2": 6164.0, "2p3/2": 5723.0, "3s": 1436.0, "3p1/2": 1274.0, "3p3/2": 1187.0, "3d3/2": 902.4, "3d5/2": 883.8, "4s": 291.0, "4p1/2": 223.2, "4p3/2": 206.5, "4d3/2": 109.0, "4f5/2": 0.1, "4f7/2": 0.1, "5s": 37.8, "5p1/2": 19.8, "5p3/2": 17.0}, + "59": {"1s": 41991.0, "2s": 6835.0, "2p1/2": 6440.0, "2p3/2": 5964.0, "3s": 1511.0, "3p1/2": 1337.0, "3p3/2": 1242.0, "3d3/2": 948.3, "3d5/2": 928.8, "4s": 304.5, "4p1/2": 236.3, "4p3/2": 217.6, "4d3/2": 115.1, "4d5/2": 115.1, "4f5/2": 2.0, "4f7/2": 2.0, "5s": 37.4, "5p1/2": 22.3, "5p3/2": 22.3}, + "60": {"1s": 43569.0, "2s": 7126.0, "2p1/2": 6722.0, "2p3/2": 6208.0, "3s": 1575.0, "3p1/2": 1403.0, "3p3/2": 1297.0, "3d3/2": 1003.3, "3d5/2": 980.4, "4s": 319.2, "4p1/2": 243.3, "4p3/2": 224.6, "4d3/2": 120.5, "4d5/2": 120.5, "4f5/2": 1.5, "4f7/2": 1.5, "5s": 37.5, "5p1/2": 21.1, "5p3/2": 21.1}, + "61": {"1s": 45184.0, "2s": 7428.0, "2p1/2": 7013.0, "2p3/2": 6459.0, "3p1/2": 1471.0, "3p3/2": 1357.0, "3d3/2": 1052.0, "3d5/2": 1027.0, "4p1/2": 242.0, "4p3/2": 242.0, "4d3/2": 120.0, "4d5/2": 120.0}, + "62": {"1s": 46834.0, "2s": 7737.0, "2p1/2": 7312.0, "2p3/2": 6716.0, "3s": 1723.0, "3p1/2": 1541.0, "3p3/2": 1420.0, "3d3/2": 1110.9, "3d5/2": 1083.4, "4s": 347.2, "4p1/2": 265.6, "4p3/2": 247.4, "4d3/2": 129.0, "4d5/2": 129.0, "4f5/2": 5.2, "4f7/2": 5.2, "5s": 37.4, "5p1/2": 21.3, "5p3/2": 21.3}, + "63": {"1s": 48519.0, "2s": 8052.0, "2p1/2": 7617.0, "2p3/2": 6977.0, "3s": 1800.0, "3p1/2": 1614.0, "3p3/2": 1481.0, "3d3/2": 1158.6, "3d5/2": 1127.5, "4s": 360.0, "4p1/2": 284.0, "4p3/2": 257.0, "4d3/2": 133.0, "4d5/2": 127.7, "4f5/2": 0.0, "4f7/2": 0.0, "5s": 32.0, "5p1/2": 22.0, "5p3/2": 22.0}, + "64": {"1s": 50239.0, "2s": 8376.0, "2p1/2": 7930.0, "2p3/2": 7243.0, "3s": 1881.0, "3p1/2": 1688.0, "3p3/2": 1544.0, "3d3/2": 1221.9, "3d5/2": 1189.6, "4s": 378.6, "4p1/2": 286.0, "4p3/2": 271.0, "4d5/2": 142.6, "4f5/2": 8.6, "4f7/2": 8.6, "5s": 36.0, "5p1/2": 28.0, "5p3/2": 21.0}, + "65": {"1s": 51996.0, "2s": 8708.0, "2p1/2": 8252.0, "2p3/2": 7514.0, "3s": 1968.0, "3p1/2": 1768.0, "3p3/2": 1611.0, "3d3/2": 1276.9, "3d5/2": 1241.1, "4s": 396.0, "4p1/2": 322.4, "4p3/2": 284.1, "4d3/2": 150.5, "4d5/2": 150.5, "4f5/2": 7.7, "4f7/2": 2.4, "5s": 45.6, "5p1/2": 28.7, "5p3/2": 22.6}, + "66": {"1s": 53789.0, "2s": 9046.0, "2p1/2": 8581.0, "2p3/2": 7790.0, "3s": 2047.0, "3p1/2": 1842.0, "3p3/2": 1676.0, "3d3/2": 1333.0, "3d5/2": 1292.6, "4s": 414.2, "4p1/2": 333.5, "4p3/2": 293.2, "4d3/2": 153.6, "4d5/2": 153.6, "4f5/2": 8.0, "4f7/2": 4.3, "5s": 49.9, "5p1/2": 26.3, "5p3/2": 26.3}, + "67": {"1s": 55618.0, "2s": 9394.0, "2p1/2": 8918.0, "2p3/2": 8071.0, "3s": 2128.0, "3p1/2": 1923.0, "3p3/2": 1741.0, "3d3/2": 1392.0, "3d5/2": 1351.0, "4s": 432.4, "4p1/2": 343.5, "4p3/2": 308.2, "4d3/2": 160.0, "4d5/2": 160.0, "4f5/2": 8.6, "4f7/2": 5.2, "5s": 49.3, "5p1/2": 30.8, "5p3/2": 24.1}, + "68": {"1s": 57486.0, "2s": 9751.0, "2p1/2": 9264.0, "2p3/2": 8358.0, "3s": 2207.0, "3p1/2": 2006.0, "3p3/2": 1812.0, "3d3/2": 1453.0, "3d5/2": 1409.0, "4s": 449.8, "4p1/2": 366.2, "4p3/2": 320.2, "4d3/2": 167.6, "4d5/2": 167.6, "4f7/2": 4.7, "5s": 50.6, "5p1/2": 31.4, "5p3/2": 24.7}, + "69": {"1s": 59390.0, "2s": 10116.0, "2p1/2": 9617.0, "2p3/2": 8648.0, "3s": 2307.0, "3p1/2": 2090.0, "3p3/2": 1885.0, "3d3/2": 1515.0, "3d5/2": 1468.0, "4s": 470.9, "4p1/2": 385.9, "4p3/2": 332.6, "4d3/2": 175.5, "4d5/2": 175.5, "4f7/2": 4.6, "5s": 54.7, "5p1/2": 31.8, "5p3/2": 25.0}, + "70": {"1s": 61332.0, "2s": 10486.0, "2p1/2": 9978.0, "2p3/2": 8944.0, "3s": 2398.0, "3p1/2": 2173.0, "3p3/2": 1950.0, "3d3/2": 1576.0, "3d5/2": 1528.0, "4s": 480.5, "4p1/2": 388.7, "4p3/2": 339.7, "4d3/2": 191.2, "4d5/2": 182.4, "4f5/2": 2.5, "4f7/2": 1.3, "5s": 52.0, "5p1/2": 30.3, "5p3/2": 24.1}, + "71": {"1s": 63314.0, "2s": 10870.0, "2p1/2": 10349.0, "2p3/2": 9244.0, "3s": 2491.0, "3p1/2": 2264.0, "3p3/2": 2024.0, "3d3/2": 1639.0, "3d5/2": 1589.0, "4s": 506.8, "4p1/2": 412.4, "4p3/2": 359.2, "4d3/2": 206.1, "4d5/2": 196.3, "4f5/2": 8.9, "4f7/2": 7.5, "5s": 57.3, "5p1/2": 33.6, "5p3/2": 26.7}, + "72": {"1s": 65351.0, "2s": 11271.0, "2p1/2": 10739.0, "2p3/2": 9561.0, "3s": 2601.0, "3p1/2": 2365.0, "3p3/2": 2108.0, "3d3/2": 1716.0, "3d5/2": 1662.0, "4s": 538.0, "4p1/2": 438.2, "4p3/2": 380.7, "4d3/2": 220.0, "4d5/2": 211.5, "4f5/2": 15.9, "4f7/2": 14.2, "5s": 64.2, "5p1/2": 38.0, "5p3/2": 29.9}, + "73": {"1s": 67416.0, "2s": 11682.0, "2p1/2": 11136.0, "2p3/2": 9881.0, "3s": 2708.0, "3p1/2": 2469.0, "3p3/2": 2194.0, "3d3/2": 1793.0, "3d5/2": 1735.0, "4s": 563.4, "4p1/2": 463.4, "4p3/2": 400.9, "4d3/2": 237.9, "4d5/2": 226.4, "4f5/2": 23.5, "4f7/2": 21.6, "5s": 69.7, "5p1/2": 42.2, "5p3/2": 32.7}, + "74": {"1s": 69525.0, "2s": 12100.0, "2p1/2": 11544.0, "2p3/2": 10207.0, "3s": 2820.0, "3p1/2": 2575.0, "3p3/2": 2281.0, "3d3/2": 1872.0, "3d5/2": 1809.0, "4s": 594.1, "4p1/2": 490.4, "4p3/2": 423.6, "4d3/2": 255.9, "4d5/2": 243.5, "4f5/2": 33.6, "4f7/2": 31.4, "5s": 75.6, "5p1/2": 45.3, "5p3/2": 36.8}, + "75": {"1s": 71676.0, "2s": 12527.0, "2p1/2": 11959.0, "2p3/2": 10535.0, "3s": 2932.0, "3p1/2": 2682.0, "3p3/2": 2367.0, "3d3/2": 1949.0, "3d5/2": 1883.0, "4s": 625.4, "4p1/2": 518.7, "4p3/2": 446.8, "4d3/2": 273.9, "4d5/2": 260.5, "4f5/2": 42.9, "4f7/2": 40.5, "5s": 83.0, "5p1/2": 45.6, "5p3/2": 34.6}, + "76": {"1s": 73871.0, "2s": 12968.0, "2p1/2": 12385.0, "2p3/2": 10871.0, "3s": 3049.0, "3p1/2": 2792.0, "3p3/2": 2457.0, "3d3/2": 2031.0, "3d5/2": 1960.0, "4s": 658.2, "4p1/2": 549.1, "4p3/2": 470.7, "4d3/2": 293.1, "4d5/2": 278.5, "4f5/2": 53.4, "4f7/2": 50.7, "5s": 84.0, "5p1/2": 58.0, "5p3/2": 44.5}, + "77": {"1s": 76111.0, "2s": 13419.0, "2p1/2": 12824.0, "2p3/2": 11215.0, "3s": 3174.0, "3p1/2": 2909.0, "3p3/2": 2551.0, "3d3/2": 2116.0, "3d5/2": 2040.0, "4s": 691.1, "4p1/2": 577.8, "4p3/2": 495.8, "4d3/2": 311.9, "4d5/2": 296.3, "4f5/2": 63.8, "4f7/2": 60.8, "5s": 95.2, "5p1/2": 63.0, "5p3/2": 48.0}, + "78": {"1s": 78395.0, "2s": 13880.0, "2p1/2": 13273.0, "2p3/2": 11564.0, "3s": 3296.0, "3p1/2": 3027.0, "3p3/2": 2645.0, "3d3/2": 2202.0, "3d5/2": 2122.0, "4s": 725.4, "4p1/2": 609.1, "4p3/2": 519.4, "4d3/2": 331.6, "4d5/2": 314.6, "4f5/2": 74.5, "4f7/2": 71.2, "5s": 101.7, "5p1/2": 65.3, "5p3/2": 51.7}, + "79": {"1s": 80725.0, "2s": 14353.0, "2p1/2": 13734.0, "2p3/2": 11919.0, "3s": 3425.0, "3p1/2": 3148.0, "3p3/2": 2743.0, "3d3/2": 2291.0, "3d5/2": 2206.0, "4s": 762.1, "4p1/2": 642.7, "4p3/2": 546.3, "4d3/2": 353.2, "4d5/2": 335.1, "4f5/2": 87.6, "4f7/2": 84.0, "5s": 107.2, "5p1/2": 74.2, "5p3/2": 57.2}, + "80": {"1s": 83102.0, "2s": 14839.0, "2p1/2": 14209.0, "2p3/2": 12284.0, "3s": 3562.0, "3p1/2": 3279.0, "3p3/2": 2847.0, "3d3/2": 2385.0, "3d5/2": 2295.0, "4s": 802.2, "4p1/2": 680.2, "4p3/2": 576.6, "4d3/2": 378.2, "4d5/2": 358.8, "4f5/2": 104.0, "4f7/2": 99.9, "5s": 127.0, "5p1/2": 83.1, "5p3/2": 64.5, "5d3/2": 9.6, "5d5/2": 7.8}, + "81": {"1s": 85530.0, "2s": 15347.0, "2p1/2": 14698.0, "2p3/2": 12658.0, "3s": 3704.0, "3p1/2": 3416.0, "3p3/2": 2957.0, "3d3/2": 2485.0, "3d5/2": 2389.0, "4s": 846.2, "4p1/2": 720.5, "4p3/2": 609.5, "4d3/2": 405.7, "4d5/2": 385.0, "4f5/2": 122.2, "4f7/2": 117.8, "5s": 136.0, "5p1/2": 94.6, "5p3/2": 73.5, "5d3/2": 14.7, "5d5/2": 12.5}, + "82": {"1s": 88005.0, "2s": 15861.0, "2p1/2": 15200.0, "2p3/2": 13035.0, "3s": 3851.0, "3p1/2": 3554.0, "3p3/2": 3066.0, "3d3/2": 2586.0, "3d5/2": 2484.0, "4s": 891.8, "4p1/2": 761.9, "4p3/2": 643.5, "4d3/2": 434.3, "4d5/2": 412.2, "4f5/2": 141.7, "4f7/2": 136.9, "5s": 147.0, "5p1/2": 106.4, "5p3/2": 83.3, "5d3/2": 20.7, "5d5/2": 18.1}, + "83": {"1s": 90526.0, "2s": 16388.0, "2p1/2": 15711.0, "2p3/2": 13419.0, "3s": 3999.0, "3p1/2": 3696.0, "3p3/2": 3177.0, "3d3/2": 2688.0, "3d5/2": 2580.0, "4s": 939.0, "4p1/2": 805.2, "4p3/2": 678.8, "4d3/2": 464.0, "4d5/2": 440.1, "4f5/2": 162.3, "4f7/2": 157.0, "5s": 159.3, "5p1/2": 119.0, "5p3/2": 92.6, "5d3/2": 26.9, "5d5/2": 23.8}, + "84": {"1s": 93105.0, "2s": 16939.0, "2p1/2": 16244.0, "2p3/2": 13814.0, "3s": 4149.0, "3p1/2": 3854.0, "3p3/2": 3302.0, "3d3/2": 2798.0, "3d5/2": 2683.0, "4s": 995.0, "4p1/2": 851.0, "4p3/2": 705.0, "4d3/2": 500.0, "4d5/2": 473.0, "4f5/2": 184.0, "4f7/2": 184.0, "5s": 177.0, "5p1/2": 132.0, "5p3/2": 104.0, "5d3/2": 31.0, "5d5/2": 31.0}, + "85": {"1s": 95730.0, "2s": 17493.0, "2p1/2": 16785.0, "2p3/2": 14214.0, "3s": 4317.0, "3p1/2": 4008.0, "3p3/2": 3426.0, "3d3/2": 2909.0, "3d5/2": 2787.0, "4s": 1042.0, "4p1/2": 886.0, "4p3/2": 740.0, "4d3/2": 533.0, "4d5/2": 507.0, "4f5/2": 210.0, "4f7/2": 210.0, "5s": 195.0, "5p1/2": 148.0, "5p3/2": 115.0, "5d3/2": 40.0, "5d5/2": 40.0}, + "86": {"1s": 98404.0, "2s": 18049.0, "2p1/2": 17337.0, "2p3/2": 14619.0, "3s": 4482.0, "3p1/2": 4159.0, "3p3/2": 3538.0, "3d3/2": 3022.0, "3d5/2": 2892.0, "4s": 1097.0, "4p1/2": 929.0, "4p3/2": 768.0, "4d3/2": 567.0, "4d5/2": 541.0, "4f5/2": 238.0, "4f7/2": 238.0, "5s": 214.0, "5p1/2": 164.0, "5p3/2": 127.0, "5d3/2": 48.0, "5d5/2": 48.0, "6s": 26.0}, + "87": {"1s": 101137.0, "2s": 18639.0, "2p1/2": 17907.0, "2p3/2": 15031.0, "3s": 4652.0, "3p1/2": 4327.0, "3p3/2": 3663.0, "3d3/2": 3136.0, "3d5/2": 3000.0, "4s": 1153.0, "4p1/2": 980.0, "4p3/2": 810.0, "4d3/2": 603.0, "4d5/2": 577.0, "4f5/2": 268.0, "4f7/2": 268.0, "5s": 234.0, "5p1/2": 182.0, "5p3/2": 140.0, "5d3/2": 58.0, "5d5/2": 58.0, "6s": 34.0, "6p1/2": 15.0, "6p3/2": 15.0}, + "88": {"1s": 103922.0, "2s": 19237.0, "2p1/2": 18484.0, "2p3/2": 15444.0, "3s": 4822.0, "3p1/2": 4490.0, "3p3/2": 3792.0, "3d3/2": 3248.0, "3d5/2": 3105.0, "4s": 1208.0, "4p1/2": 1058.0, "4p3/2": 879.0, "4d3/2": 636.0, "4d5/2": 603.0, "4f5/2": 299.0, "4f7/2": 299.0, "5s": 254.0, "5p1/2": 200.0, "5p3/2": 153.0, "5d3/2": 68.0, "5d5/2": 68.0, "6s": 44.0, "6p1/2": 19.0, "6p3/2": 19.0}, + "89": {"1s": 106755.0, "2s": 19840.0, "2p1/2": 19083.0, "2p3/2": 15871.0, "3s": 5002.0, "3p1/2": 4656.0, "3p3/2": 3909.0, "3d3/2": 3370.0, "3d5/2": 3219.0, "4s": 1269.0, "4p1/2": 1080.0, "4p3/2": 890.0, "4d3/2": 675.0, "4d5/2": 639.0, "4f5/2": 319.0, "4f7/2": 319.0, "5s": 272.0, "5p1/2": 215.0, "5p3/2": 167.0, "5d3/2": 80.0, "5d5/2": 80.0}, + "90": {"1s": 109651.0, "2s": 20472.0, "2p1/2": 19693.0, "2p3/2": 16300.0, "3s": 5182.0, "3p1/2": 4830.0, "3p3/2": 4046.0, "3d3/2": 3491.0, "3d5/2": 3332.0, "4s": 1330.0, "4p1/2": 1168.0, "4p3/2": 966.4, "4d3/2": 712.1, "4d5/2": 675.2, "4f5/2": 342.4, "4f7/2": 333.1, "5s": 290.0, "5p1/2": 229.0, "5p3/2": 182.0, "5d3/2": 92.5, "5d5/2": 85.4, "6s": 41.4, "6p1/2": 24.5, "6p3/2": 16.6}, + "91": {"1s": 112601.0, "2s": 21105.0, "2p1/2": 20314.0, "2p3/2": 16733.0, "3s": 5367.0, "3p1/2": 5001.0, "3p3/2": 4174.0, "3d3/2": 3611.0, "3d5/2": 3442.0, "4s": 1387.0, "4p1/2": 1224.0, "4p3/2": 1007.0, "4d3/2": 743.0, "4d5/2": 708.0, "4f5/2": 371.0, "4f7/2": 360.0, "5s": 310.0, "5p1/2": 232.0, "5p3/2": 232.0, "5d3/2": 94.0, "5d5/2": 94.0}, + "92": {"1s": 115606.0, "2s": 21757.0, "2p1/2": 20948.0, "2p3/2": 17166.0, "3s": 5548.0, "3p1/2": 5182.0, "3p3/2": 4303.0, "3d3/2": 3728.0, "3d5/2": 3552.0, "4s": 1439.0, "4p1/2": 1271.0, "4p3/2": 1043.0, "4d3/2": 778.3, "4d5/2": 736.2, "4f5/2": 388.2, "4f7/2": 377.4, "5s": 321.0, "5p1/2": 257.0, "5p3/2": 192.0, "5d3/2": 102.8, "5d5/2": 94.2, "6s": 43.9, "6p1/2": 26.8, "6p3/2": 16.8} +} diff --git a/pmsco/elements/bindingenergy.py b/pmsco/elements/bindingenergy.py new file mode 100644 index 0000000..c1ca732 --- /dev/null +++ b/pmsco/elements/bindingenergy.py @@ -0,0 +1,155 @@ +""" +@package pmsco.elements.bindingenergy +electron binding energies of the elements + +extends the element table of the `periodictable` package +(https://periodictable.readthedocs.io/en/latest/index.html) +by the electron binding energies. + +the binding energies are compiled from Gwyn Williams' web page +(https://userweb.jlab.org/~gwyn/ebindene.html). +please refer to the original web page or the x-ray data booklet +for original sources, definitions and remarks. + +usage +----- + +this module requires the periodictable package (https://pypi.python.org/pypi/periodictable). + +~~~~~~{.py} +import periodictable as pt +import pmsco.elements.bindingenergy + +# read any periodictable's element interfaces, e.g. +print(pt.gold.binding_energy['4f7/2']) +print(pt.elements.symbol('Au').binding_energy['4f7/2']) +print(pt.elements.name('gold').binding_energy['4f7/2']) +print(pt.elements[79].binding_energy['4f7/2']) +~~~~~~ + +note that attributes are writable. +you may assign refined values in your instance of the database. + +the query_binding_energy() function queries all terms with a particular binding energy. + + +@author Matthias Muntwiler + +@copyright (c) 2020 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +import json +import numpy as np +import os +import periodictable as pt +from pmsco.compat import open + + +index_energy = np.zeros(0) +index_number = np.zeros(0) +index_term = [] + + +def load_data(): + data_path = os.path.join(os.path.dirname(__file__), "bindingenergy.json") + with open(data_path) as fp: + data = json.load(fp) + return data + + +def init(table, reload=False): + if 'binding_energy' in table.properties and not reload: + return + table.properties.append('binding_energy') + + pt.core.Element.binding_energy = {} + pt.core.Element.binding_energy_units = "eV" + + data = load_data() + for el_key, el_data in data.items(): + try: + el = table[int(el_key)] + except ValueError: + el = table.symbol(el_key) + el.binding_energy = el_data + + +def build_index(): + """ + build an index for query_binding_energy(). + + the index is kept in global variables of the module. + + @return None + """ + global index_energy + global index_number + global index_term + + n = 0 + for element in pt.elements: + n += len(element.binding_energy) + + index_energy = np.zeros(n) + index_number = np.zeros(n) + index_term = [] + + for element in pt.elements: + for term, energy in element.binding_energy.items(): + index_term.append(term) + i = len(index_term) - 1 + index_energy[i] = energy + index_number[i] = element.number + + +def query_binding_energy(energy, tol=1.0): + """ + search the periodic table for a specific binding energy and return all matching terms. + + @param energy: binding energy in eV. + + @param tol: tolerance in eV. + + @return: list of dictionaries containing element and term specification. + the list is ordered arbitrarily. + each dictionary contains the following keys: + @arg 'number': element number + @arg 'symbol': element symbol + @arg 'term': spectroscopic term + @arg 'energy': actual binding energy + """ + if len(index_energy) == 0: + build_index() + sel = np.abs(index_energy - energy) < tol + idx = np.where(sel) + result = [] + for i in idx[0]: + el_num = int(index_number[i]) + d = {'number': el_num, + 'symbol': pt.elements[el_num].symbol, + 'term': index_term[i], + 'energy': index_energy[i]} + result.append(d) + + return result + + +def export_flat_text(f): + """ + export the binding energies to a flat general text file. + + @param f: file path or open file object + @return: None + """ + if hasattr(f, "write") and callable(f.write): + f.write("number symbol term energy\n") + for element in pt.elements: + for term, energy in element.binding_energy.items(): + f.write(f"{element.number} {element.symbol} {term} {energy}\n") + else: + with open(f, "w") as fi: + export_flat_text(fi) diff --git a/pmsco/elements/cross-sections.dat b/pmsco/elements/cross-sections.dat new file mode 100644 index 0000000..b12e106 Binary files /dev/null and b/pmsco/elements/cross-sections.dat differ diff --git a/pmsco/elements/photoionization.py b/pmsco/elements/photoionization.py new file mode 100644 index 0000000..3a5742b --- /dev/null +++ b/pmsco/elements/photoionization.py @@ -0,0 +1,248 @@ +""" +@package pmsco.elements.photoionization +photoionization cross-sections of the elements + +extends the element table of the `periodictable` package +(https://periodictable.readthedocs.io/en/latest/index.html) +by a table of photoionization cross-sections. + + +the data is available from (https://vuo.elettra.eu/services/elements/) +or (https://figshare.com/articles/dataset/Digitisation_of_Yeh_and_Lindau_Photoionisation_Cross_Section_Tabulated_Data/12389750). +both sources are based on the original atomic data tables by Yeh and Lindau (1985). +the Elettra data includes interpolation at finer steps, +whereas the Kalha data contains only the original data points by Yeh and Lindau +plus an additional point at 8 keV. +the tables go up to 1500 eV photon energy and do not resolve spin-orbit splitting. + + +usage +----- + +this module requires python 3.6, numpy and the periodictable package (https://pypi.python.org/pypi/periodictable). + +~~~~~~{.py} +import numpy as np +import periodictable as pt +import pmsco.elements.photoionization + +# read any periodictable's element interfaces as follows. +# eph and cs are numpy arrays of identical shape that hold the photon energies and cross sections. +eph, cs = pt.gold.photoionization.cross_section['4f'] +eph, cs = pt.elements.symbol('Au').photoionization.cross_section['4f'] +eph, cs = pt.elements.name('gold').photoionization.cross_section['4f'] +eph, cs = pt.elements[79].photoionization.cross_section['4f'] + +# interpolate for specific photon energy +print(np.interp(photon_energy, eph, cs) +~~~~~~ + +the data is loaded from the cross-sections.dat file which is a python-pickled data file. +to switch between data sources, use one of the load functions defined here +and dump the data to the cross-sections.dat file. + + +@author Matthias Muntwiler + +@copyright (c) 2020 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +import numpy as np +from pathlib import Path +import periodictable as pt +import pickle +import urllib.request +import urllib.error +from . import bindingenergy + + +def load_kalha_data(): + """ + load all cross-sections from csv-files by Kalha et al. + + the files must be placed in the 'kalha' directory next to this file. + + @return: cross-section data in a nested dictionary, cf. load_pickled_data(). + """ + data = {} + p = Path(Path(__file__).parent, "kalha") + for entry in p.glob('*_*.csv'): + if entry.is_file(): + try: + element = int(entry.stem.split('_')[0]) + except ValueError: + pass + else: + data[element] = load_kalha_file(entry) + return data + + +def load_kalha_file(path): + """ + load the cross-sections of an element from a csv-file by Kalha et al. + + @param path: file path + @return: (dict) dictionary of 'nl' terms. + the data items are tuples (photon_energy, cross_sections) of 1-dimensional numpy arrays. + """ + a = np.genfromtxt(path, delimiter=',', names=True) + b = ~np.isnan(a['Photon_Energy__eV']) + a = a[b] + eph = a['Photon_Energy__eV'].copy() + data = {} + for n in range(1, 8): + for l in 'spdf': + col = f"{n}{l}" + try: + data[col] = (eph, a[col].copy()) + except ValueError: + pass + return data + + +def load_kalha_configuration(path): + """ + load the electron configuration from a csv-file by Kalha et al. + + @param path: file path + @return: (dict) dictionary of 'nl' terms mapping to number of electrons in the sub-shell. + """ + p = Path(path) + subshells = [] + electrons = [] + config = {} + with p.open() as f: + for l in f.readlines(): + s = l.split(',') + k_eph = "Photon Energy" + k_el = "#electrons" + if s[0][0:len(k_eph)] == k_eph: + subshells = s[1:] + elif s[0][0:len(k_el)] == k_el: + electrons = s[1:] + + for i, sh in enumerate(subshells): + if sh: + config[sh] = electrons[i] + + return config + + +def load_elettra_file(symbol, nl): + """ + download the cross sections of one level from the Elettra webelements web site. + + @param symbol: (str) element symbol + @param nl: (str) nl term, e.g. '2p' (no spin-orbit) + @return: (photon_energy, cross_section) tuple of 1-dimensional numpy arrays. + """ + url = f"https://vuo.elettra.eu/services/elements/data/{symbol.lower()}{nl}.txt" + try: + data = urllib.request.urlopen(url) + except urllib.error.HTTPError: + eph = None + cs = None + else: + a = np.genfromtxt(data) + try: + eph = a[:, 0] + cs = a[:, 1] + except IndexError: + eph = None + cs = None + + return eph, cs + + +def load_elettra_data(): + """ + download the cross sections from the Elettra webelements web site. + + @return: cross-section data in a nested dictionary, cf. load_pickled_data(). + """ + data = {} + for element in pt.elements: + element_data = {} + for nlj in element.binding_energy: + nl = nlj[0:2] + eb = element.binding_energy[nlj] + if nl not in element_data and eb <= 2000: + eph, cs = load_elettra_file(element.symbol, nl) + if eph is not None and cs is not None: + element_data[nl] = (eph, cs) + if len(element_data): + data[element.symbol] = element_data + + return data + + +def save_pickled_data(path, data): + """ + save a cross section data dictionary to a python-pickled file. + + @param path: file path + @param data: cross-section data in a nested dictionary, cf. load_pickled_data(). + @return: None + """ + with open(path, "wb") as f: + pickle.dump(data, f) + + +def load_pickled_data(path): + """ + load the cross section data from a python-pickled file. + + the file can be generated by the save_pickled_data() function. + + @param path: file path + @return: cross-section data in a nested dictionary. + the first-level keys are element symbols. + the second-level keys are 'nl' terms (e.g. '2p'). + note that the Yeh and Lindau tables do not resolve spin-orbit splitting. + the data items are (photon_energy, cross_sections) tuples + of 1-dimensional numpy arrays holding the data table. + cross section values are given in Mb. + """ + with open(path, "rb") as f: + data = pickle.load(f) + return data + + +class Photoionization(object): + def __init__(self): + self.cross_section = {} + self.cross_section_units = "Mb" + + +def init(table, reload=False): + """ + loads cross section data into the periodic table. + + this function is called by the periodictable to load the data on demand. + + @param table: + @param reload: + @return: + """ + if 'photoionization' in table.properties and not reload: + return + table.properties.append('photoionization') + + # default value + pt.core.Element.photoionization = Photoionization() + + p = Path(Path(__file__).parent, "cross-sections.dat") + data = load_pickled_data(p) + for el_key, el_data in data.items(): + try: + el = table[int(el_key)] + except ValueError: + el = table.symbol(el_key) + pi = Photoionization() + pi.cross_section = el_data + pi.cross_section_units = "Mb" + el.photoionization = pi diff --git a/pmsco/elements/spectrum.py b/pmsco/elements/spectrum.py new file mode 100644 index 0000000..32cd924 --- /dev/null +++ b/pmsco/elements/spectrum.py @@ -0,0 +1,198 @@ +""" +@package pmsco.elements.spectrum +photoelectron spectrum simulator + +this module calculates the basic structure of a photoelectron spectrum. +it calculates positions and approximate amplitude of elastic peaks +based on photon energy, binding energy, photoionization cross section, and stoichiometry. +escape depth, photon flux, analyser transmission are not accounted for. + + +usage +----- + +this module requires python 3.6, numpy, matplotlib and +the periodictable package (https://pypi.python.org/pypi/periodictable). + +~~~~~~{.py} +import numpy as np +import periodictable as pt +import pmsco.elements.spectrum as spec + +# for working with the data +labels, energy, intensity = spec.build_spectrum(800., {"Ti": 1, "O": 2}) + +# for plotting +spec.plot_spectrum(800., {"Ti": 1, "O": 2}) +~~~~~~ + + + +@author Matthias Muntwiler + +@copyright (c) 2020 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +from matplotlib import pyplot as plt +import numpy as np +import periodictable as pt +from . import bindingenergy +from . import photoionization + + +def get_element(number_or_symbol): + """ + return the given Element object of the periodic table. + + @param number_or_symbol: atomic number (int) or chemical symbol (str). + @return: Element object. + """ + try: + el = pt.elements[number_or_symbol] + except KeyError: + el = pt.elements.symbol(number_or_symbol) + return el + + +def get_binding_energy(photon_energy, element, nlj): + """ + look up the binding energy of a core level and check whether it is smaller than the photon energy. + + @param photon_energy: photon energy in eV. + @param element: Element object of the periodic table. + @param nlj: (str) spectroscopic term, e.g. '4f7/2'. + @return: (float) binding energy or numpy.nan. + """ + try: + eb = element.binding_energy[nlj] + except KeyError: + return np.nan + if eb < photon_energy: + return eb + else: + return np.nan + + +def get_cross_section(photon_energy, element, nlj): + """ + look up the photoionization cross section. + + since the Yeh/Lindau tables do not resolve the spin-orbit splitting, + this function applies the normal relative weights of a full sub-shell. + + the result is a linear interpolation between tabulated values. + + @param photon_energy: photon energy in eV. + @param element: Element object of the periodic table. + @param nlj: (str) spectroscopic term, e.g. '4f7/2'. + @return: (float) cross section in Mb. + """ + nl = nlj[0:2] + try: + pet, cst = element.photoionization.cross_section[nl] + except KeyError: + return np.nan + + # weights of spin-orbit peaks + d_wso = {"p1/2": 1./3., + "p3/2": 2./3., + "d3/2": 2./5., + "d5/2": 3./5., + "f5/2": 3./7., + "f7/2": 4./7.} + wso = d_wso.get(nlj[1:], 1.) + cst = cst * wso + + # todo: consider spline + return np.interp(photon_energy, pet, cst) + + +def build_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5): + """ + calculate the positions and amplitudes of core-level photoemission lines. + + the function looks up the binding energies and cross sections of all photoemission lines in the energy range + given by the photon energy and returns an array of expected spectral lines. + + @param photon_energy: (numeric) photon energy in eV. + @param elements: list or dictionary of elements. + elements are identified by their atomic number (int) or chemical symbol (str). + if a dictionary is given, the (float) values are stoichiometric weights of the elements. + @param binding_energy: (bool) return binding energies (True) rather than kinetic energies (False, default). + @param work_function: (float) work function of the instrument in eV. + @return: tuple (labels, positions, intensities) of 1-dimensional numpy arrays representing the spectrum. + labels are in the format {Symbol}{n}{l}{j}. + """ + ekin = [] + ebind = [] + intens = [] + labels = [] + + for element in elements: + el = get_element(element) + for n in range(1, 8): + for l in "spdf": + for j in ['', '1/2', '3/2', '5/2', '7/2']: + nlj = f"{n}{l}{j}" + eb = get_binding_energy(photon_energy, el, nlj) + cs = get_cross_section(photon_energy, el, nlj) + try: + cs = cs * elements[element] + except (KeyError, TypeError): + pass + if not np.isnan(eb) and not np.isnan(cs): + ekin.append(photon_energy - eb - work_function) + ebind.append(eb) + intens.append(cs) + labels.append(f"{el.symbol}{nlj}") + + ebind = np.array(ebind) + ekin = np.array(ekin) + intens = np.array(intens) + labels = np.array(labels) + + if binding_energy: + return labels, ebind, intens + else: + return labels, ekin, intens + + +def plot_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5, show_labels=True): + """ + plot a simple spectrum representation of a material. + + the function looks up the binding energies and cross sections of all photoemission lines in the energy range + given by the photon energy and returns an array of expected spectral lines. + + the spectrum is plotted using matplotlib.pyplot.stem. + + @param photon_energy: (numeric) photon energy in eV. + @param elements: list or dictionary of elements. + elements are identified by their atomic number (int) or chemical symbol (str). + if a dictionary is given, the (float) values are stoichiometric weights of the elements. + @param binding_energy: (bool) return binding energies (True) rather than kinetic energies (False, default). + @param work_function: (float) work function of the instrument in eV. + @param show_labels: (bool) show peak labels (True, default) or not (False). + @return: (figure, axes) + """ + labels, energy, intensity = build_spectrum(photon_energy, elements, binding_energy=binding_energy, + work_function=work_function) + + fig, ax = plt.subplots() + ax.stem(energy, intensity, basefmt=' ', use_line_collection=True) + if show_labels: + for sxy in zip(labels, energy, intensity): + ax.annotate(sxy[0], xy=(sxy[1], sxy[2]), textcoords='data') + + ax.grid() + if binding_energy: + ax.set_xlabel('binding energy') + else: + ax.set_xlabel('kinetic energy') + ax.set_ylabel('intensity') + ax.set_title(elements) + return fig, ax diff --git a/pmsco/files.py b/pmsco/files.py index 25ede36..08609c5 100644 --- a/pmsco/files.py +++ b/pmsco/files.py @@ -27,20 +27,20 @@ logger = logging.getLogger(__name__) # # each string of this set marks a category of files. # -# @arg @c 'input' : raw input files for calculator, including cluster and atomic files in custom format -# @arg @c 'output' : raw output files from calculator -# @arg @c 'atomic' : atomic scattering (phase, emission) files in portable format -# @arg @c 'cluster' : cluster files in portable XYZ format for report -# @arg @c 'log' : log files -# @arg @c 'debug' : debug files -# @arg @c 'model': output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) -# @arg @c 'scan' : output files in ETPAI format: scan (a_b_-1_-1_-1) -# @arg @c 'symmetry' : output files in ETPAI format: symmetry (a_b_c_-1_-1) -# @arg @c 'emitter' : output files in ETPAI format: emitter (a_b_c_d_-1) -# @arg @c 'region' : output files in ETPAI format: region (a_b_c_d_e) -# @arg @c 'report': final report of results -# @arg @c 'population': final state of particle population -# @arg @c 'rfac': files related to models which give bad r-factors (dynamic category, see below). +# @arg 'input' : raw input files for calculator, including cluster and atomic files in custom format +# @arg 'output' : raw output files from calculator +# @arg 'atomic' : atomic scattering (phase, emission) files in portable format +# @arg 'cluster' : cluster files in portable XYZ format for report +# @arg 'log' : log files +# @arg 'debug' : debug files +# @arg 'model': output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) +# @arg 'scan' : output files in ETPAI format: scan (a_b_-1_-1_-1) +# @arg 'domain' : output files in ETPAI format: domain (a_b_c_-1_-1) +# @arg 'emitter' : output files in ETPAI format: emitter (a_b_c_d_-1) +# @arg 'region' : output files in ETPAI format: region (a_b_c_d_e) +# @arg 'report': final report of results +# @arg 'population': final state of particle population +# @arg 'rfac': files related to models which give bad r-factors (dynamic category, see below). # # @note @c 'rfac' is a dynamic category not connected to a particular file or content type. # no file should be marked @c 'rfac'. @@ -48,7 +48,7 @@ logger = logging.getLogger(__name__) # if so, all files related to bad models are deleted, regardless of their static category. # FILE_CATEGORIES = {'cluster', 'atomic', 'input', 'output', - 'report', 'region', 'emitter', 'scan', 'symmetry', 'model', + 'report', 'region', 'emitter', 'scan', 'domain', 'model', 'log', 'debug', 'population', 'rfac'} ## @var FILE_CATEGORIES_TO_KEEP @@ -242,37 +242,52 @@ class FileTracker(object): else: self._complete_models.discard(model) - def delete_files(self, categories=None): + def delete_files(self, categories=None, incomplete_models=False): """ - delete the files matching the list of categories. + delete all files matching a set of categories. - @version this method does not act on the 'rfac' category. + this function deletes all files that are tagged with one of the given categories. + tags are set by the code sections that create the files. + for a list of common categories, see FILE_CATEGORIES. + the categories can be given as an argument or taken from the categories_to_delete property. + + files are deleted regardless of R-factor. + be sure to specify only categories that you don't need in the output at all. + + by default, only files of complete models (cf. set_model_complete()) are deleted + to avoid interference with running calculations. + to clean up after calculations, the incomplete_models argument can override this. + + @note this method does not act on the special 'rfac' category (see delete_bad_rfac()). @param categories: set of file categories to delete. - defaults to self.categories_to_delete. + if the argument is None, it defaults to the categories_to_delete property. + + @param incomplete_models: (bool) delete files of incomplete models as well. + by default (False), incomplete models are not deleted. @return: None """ if categories is None: categories = self.categories_to_delete for cat in categories: - self.delete_category(cat) + self.delete_category(cat, incomplete_models=incomplete_models) def delete_bad_rfac(self, keep=0, force_delete=False): """ - delete the files of all models except a specified number of good models. + delete all files of all models except for a specified number of best ranking models. the method first determines which models to keep. - models with R factor values of 0.0, without a specified R-factor, and the specified number of best ranking non-zero models are kept. - the files belonging to the keeper models are kept, all others are deleted, - regardless of category. - files of incomplete models are also kept. + in addition, incomplete models, models with R factor = 0.0, + and those without a specified R-factor are kept. + all other files are deleted. + the method does not consider the file category. the files are deleted from the list and the file system. - files are deleted only if 'rfac' is specified in self.categories_to_delete - or if force_delete is set to True. + the method executes only if 'rfac' is specified in self.categories_to_delete + or if force_delete is True. otherwise the method does nothing. @param keep: number of files to keep. @@ -330,18 +345,32 @@ class FileTracker(object): return len(del_models) - def delete_category(self, category): + def delete_category(self, category, incomplete_models=False): """ delete all files of a specified category from the list and the file system. - only files of complete models (cf. set_model_complete()) are deleted, but regardless of R-factor. + this function deletes all files that are tagged with the given category. + tags are set by the code sections that create the files. + for a list of common categories, see FILE_CATEGORIES. + + files are deleted regardless of R-factor. + be sure to specify only categories that you don't need in the output at all. + + by default, only files of complete models (cf. set_model_complete()) are deleted + to avoid interference with running calculations. + to clean up after calculations, the incomplete_models argument can override this. @param category: (str) category. + should be one of FILE_CATEGORIES. otherwise, the function has no effect. + + @param incomplete_models: (bool) delete files of incomplete models as well. + by default (False), incomplete models are not deleted. @return: None """ del_names = {name for (name, cat) in self._file_category.items() if cat == category} - del_names &= {name for (name, model) in self._file_model.items() if model in self._complete_models} + if not incomplete_models: + del_names &= {name for (name, model) in self._file_model.items() if model in self._complete_models} for name in del_names: self.delete_file(name) @@ -375,3 +404,33 @@ class FileTracker(object): logger.warning("file system error deleting file {0}".format(path)) else: logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model)) + + +def list_files_other_models(prefix, models): + """ + list input/output files except those of the given models. + + this can be used to clean up all files except those belonging to the given models. + + to delete the listed files: + + for f in files: + os.remove(f) + + @param prefix: file name prefix up to the first underscore. + only files starting with this prefix are listed. + + @param models: sequence or set of model numbers that should not be listed. + + @return: set of file names + """ + file_names = set([]) + for entry in os.scandir(): + if entry.is_file: + elements = entry.name.split('_') + try: + if len(elements) == 6 and elements[0] == prefix and int(elements[1]) not in models: + file_names.add(entry.name) + except (IndexError, ValueError): + pass + return file_names diff --git a/pmsco/graphics/rfactor.py b/pmsco/graphics/rfactor.py index 7b01c05..99476bd 100644 --- a/pmsco/graphics/rfactor.py +++ b/pmsco/graphics/rfactor.py @@ -182,7 +182,7 @@ def render_results(results_file, data=None): """ if data is None: - data = np.genfromtxt(results_file, names=True) + data = np.atleast_1d(np.genfromtxt(results_file, names=True)) summary = evaluate_results(data) diff --git a/pmsco/handlers.py b/pmsco/handlers.py index e9e26c4..6b096ab 100644 --- a/pmsco/handlers.py +++ b/pmsco/handlers.py @@ -1,6 +1,6 @@ """ @package pmsco.handlers -project-independent task handlers for models, scans, symmetries, emitters and energies. +project-independent task handlers for models, scans, domains, emitters and energies. calculation tasks are organized in a hierarchical tree. at each node, a task handler (feel free to find a better name) @@ -20,9 +20,9 @@ the handlers of the structural optimizers are declared in separate modules. scans are defined by the project. the actual merging step from multiple scans into one result dataset is delegated to the project class. -symmetry handlers split a task into one child per symmetry. -symmetries are defined by the project. -the actual merging step from multiple symmetries into one result dataset is delegated to the project class. +domain handlers split a task into one child per domain. +domains are defined by the project. +the actual merging step from multiple domains into one result dataset is delegated to the project class. emitter handlers split a task into one child per emitter configuration (inequivalent sets of emitting atoms). emitter configurations are defined by the project. @@ -35,8 +35,8 @@ code inspection and tests have shown that per-emitter results from EDAC can be s in order to take advantage of parallel processing. while several classes of model handlers are available, -the default handlers for scans, symmetries, emitters and energies should be sufficient in most situations. -the scan and symmetry handlers call methods of the project class to invoke project-specific functionality. +the default handlers for scans, domains, emitters and energies should be sufficient in most situations. +the scan and domain handlers call methods of the project class to invoke project-specific functionality. @author Matthias Muntwiler, matthias.muntwiler@psi.ch @@ -60,6 +60,7 @@ import os from pmsco.compat import open import pmsco.data as md +import pmsco.dispatch as dispatch import pmsco.graphics.scan as mgs from pmsco.helpers import BraceMessage as BMsg @@ -127,10 +128,14 @@ class TaskHandler(object): for best efficiency the number of tasks generated should be greater or equal the number of slots. it should not exceed N times the number of slots, where N is a reasonably small number. - @return None + @return (int) number of children that create_tasks() will generate on average. + the number does not need to be accurate, a rough estimate or order of magnitude if greater than 10 is fine. + it is used to distribute processing slots across task levels. + see pmsco.dispatch.MscoMaster.setup(). """ self._project = project self._slots = slots + return 1 def cleanup(self): """ @@ -416,6 +421,8 @@ class ScanHandler(TaskHandler): def setup(self, project, slots): """ initialize the scan task handler and save processed experimental scans. + + @return (int) number of scans defined in the project. """ super(ScanHandler, self).setup(project, slots) @@ -437,6 +444,8 @@ class ScanHandler(TaskHandler): filename = project.output_file + ".modf" + ext md.save_data(filename, project.combined_modf) + return len(self._project.scans) + def create_tasks(self, parent_task): """ generate a calculation task for each scan of the given parent task. @@ -526,7 +535,7 @@ class ScanHandler(TaskHandler): return None -class SymmetryHandler(TaskHandler): +class DomainHandler(TaskHandler): ## @var _pending_ids_per_parent # (dict) sets of child task IDs per parent # @@ -546,20 +555,29 @@ class SymmetryHandler(TaskHandler): # the values are sets of all child CalculationTask.id belonging to the parent. def __init__(self): - super(SymmetryHandler, self).__init__() + super(DomainHandler, self).__init__() self._pending_ids_per_parent = {} self._complete_ids_per_parent = {} + def setup(self, project, slots): + """ + initialize the domain task handler. + + @return (int) number of domains defined in the project. + """ + super(DomainHandler, self).setup(project, slots) + return len(self._project.domains) + def create_tasks(self, parent_task): """ - generate a calculation task for each symmetry of the given parent task. + generate a calculation task for each domain of the given parent task. - all symmetries share the same model parameters. + all domains share the same model parameters. - @return list of CalculationTask objects, with one element per symmetry. - the symmetry index varies according to project.symmetries. + @return list of CalculationTask objects, with one element per domain. + the domain index varies according to project.domains. """ - super(SymmetryHandler, self).create_tasks(parent_task) + super(DomainHandler, self).create_tasks(parent_task) parent_id = parent_task.id self._parent_tasks[parent_id] = parent_task @@ -567,10 +585,10 @@ class SymmetryHandler(TaskHandler): self._complete_ids_per_parent[parent_id] = set() out_tasks = [] - for (i_sym, sym) in enumerate(self._project.symmetries): + for (i_dom, domain) in enumerate(self._project.domains): new_task = parent_task.copy() new_task.parent_id = parent_id - new_task.change_id(sym=i_sym) + new_task.change_id(domain=i_dom) child_id = new_task.id self._pending_tasks[child_id] = new_task @@ -579,25 +597,25 @@ class SymmetryHandler(TaskHandler): out_tasks.append(new_task) if not out_tasks: - logger.error("no symmetry tasks generated. your project must declare at least one symmetry.") + logger.error("no domain tasks generated. your project must declare at least one domain.") return out_tasks def add_result(self, task): """ - collect and combine the calculation results versus symmetry. + collect and combine the calculation results versus domain. * mark the task as complete * store its result for later * check whether this was the last pending task of the family (belonging to the same parent). - the actual merging of data is delegated to the project's combine_symmetries() method. + the actual merging of data is delegated to the project's combine_domains() method. @param task: (CalculationTask) calculation task that completed. @return parent task (CalculationTask) if the family is complete. None if the family is not complete yet. """ - super(SymmetryHandler, self).add_result(task) + super(DomainHandler, self).add_result(task) self._complete_tasks[task.id] = task del self._pending_tasks[task.id] @@ -607,7 +625,7 @@ class SymmetryHandler(TaskHandler): family_pending.remove(task.id) family_complete.add(task.id) - # all symmetries complete? + # all domains complete? if len(family_pending) == 0: parent_task = self._parent_tasks[task.parent_id] @@ -624,7 +642,7 @@ class SymmetryHandler(TaskHandler): parent_task.time = reduce(lambda a, b: a + b, child_times) if parent_task.result_valid: - self._project.combine_symmetries(parent_task, child_tasks) + self._project.combine_domains(parent_task, child_tasks) self._project.evaluate_result(parent_task, child_tasks) self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan') self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan') @@ -669,6 +687,19 @@ class EmitterHandler(TaskHandler): self._pending_ids_per_parent = {} self._complete_ids_per_parent = {} + def setup(self, project, slots): + """ + initialize the emitter task handler. + + @return (int) estimated number of emitter configurations that the cluster generator will generate. + the estimate is based on the start parameters, scan 0 and domain 0. + """ + super(EmitterHandler, self).setup(project, slots) + mock_model = self._project.create_model_space().start + mock_index = dispatch.CalcID(-1, 0, 0, -1, -1) + n_emitters = project.cluster_generator.count_emitters(mock_model, mock_index) + return n_emitters + def create_tasks(self, parent_task): """ generate a calculation task for each emitter configuration of the given parent task. @@ -750,11 +781,11 @@ class EmitterHandler(TaskHandler): if parent_task.result_valid: self._project.combine_emitters(parent_task, child_tasks) self._project.evaluate_result(parent_task, child_tasks) - self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry') - self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry') + self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'domain') + self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'domain') graph_file = mgs.render_scan(parent_task.modf_filename, ref_data=self._project.scans[parent_task.id.scan].modulation) - self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry') + self._project.files.add_file(graph_file, parent_task.id.model, 'domain') del self._pending_ids_per_parent[parent_task.id] del self._complete_ids_per_parent[parent_task.id] @@ -921,7 +952,7 @@ class EnergyRegionHandler(RegionHandler): @param slots (int) number of calculation slots (processes). - @return None + @return (int) average number of child tasks """ super(EnergyRegionHandler, self).setup(project, slots) @@ -934,6 +965,8 @@ class EnergyRegionHandler(RegionHandler): logger.debug(BMsg("region handler: split scan {file} into {slots} chunks", file=os.path.basename(scan.filename), slots=self._slots_per_scan[i])) + return max(int(sum(self._slots_per_scan) / len(self._slots_per_scan)), 1) + def create_tasks(self, parent_task): """ generate a calculation task for each energy region of the given parent task. diff --git a/pmsco/makefile b/pmsco/makefile index bb88a97..3f18d64 100644 --- a/pmsco/makefile +++ b/pmsco/makefile @@ -12,7 +12,7 @@ MUFPOT_DIR = mufpot LOESS_DIR = loess PHAGEN_DIR = calculators/phagen -all: edac loess +all: edac loess phagen edac: $(MAKE) -C $(EDAC_DIR) diff --git a/pmsco/optimizers/genetic.py b/pmsco/optimizers/genetic.py index 067b3ae..c7ea0ec 100644 --- a/pmsco/optimizers/genetic.py +++ b/pmsco/optimizers/genetic.py @@ -45,7 +45,7 @@ class GeneticPopulation(population.Population): 4. mutation: a gene may mutate at random. 5. selection: the globally best individual is added to a parent population (and replaces the worst). - the main tuning parameter of the algorithm is the mutation_step which is copied from the domain.step. + the main tuning parameter of the algorithm is the mutation_step which is copied from the model_space.step. it defines the width of a gaussian distribution of change under a weak mutation. it should be large enough so that the whole parameter space can be probed, but small enough that a frequent mutation does not throw the individual out of the convergence region. @@ -92,9 +92,9 @@ class GeneticPopulation(population.Population): ## @var mutation_step # # standard deviations of the exponential distribution function used in the mutate_weak() method. - # the variable is a dictionary with the same keys as model_step (the parameter domain). + # the variable is a dictionary with the same keys as model_step (the parameter space). # - # it is initialized from the domain.step + # it is initialized from the model_space.step # or set to a default value based on the parameter range and population size. def __init__(self): @@ -110,15 +110,15 @@ class GeneticPopulation(population.Population): self.position_constrain_mode = 'random' self.mutation_step = {} - def setup(self, size, domain, **kwargs): + def setup(self, size, model_space, **kwargs): """ @copydoc Population.setup() in addition to the inherited behaviour, this method initializes self.mutation_step. - mutation_step of a parameter is set to its domain.step if non-zero. + mutation_step of a parameter is set to its model_space.step if non-zero. otherwise it is set to the parameter range divided by the population size. """ - super(GeneticPopulation, self).setup(size, domain, **kwargs) + super(GeneticPopulation, self).setup(size, model_space, **kwargs) for key in self.model_step: val = self.model_step[key] @@ -131,7 +131,7 @@ class GeneticPopulation(population.Population): this implementation is a new proposal. the distribution is not completely random. rather, a position vector (by parameter) is initialized with a linear function - that covers the parameter domain. + that covers the parameter space. the linear function is then permuted randomly. the method does not update the particle info fields. @@ -243,7 +243,7 @@ class GeneticPopulation(population.Population): """ apply a weak mutation to a model. - each parameter is changed to a different value in the domain of the parameter at the given probability. + each parameter is changed to a different value in the parameter space at the given probability. the amount of change has a gaussian distribution with a standard deviation of mutation_step. @param[in,out] model: structured numpy.ndarray holding the model parameters. @@ -263,7 +263,7 @@ class GeneticPopulation(population.Population): """ apply a strong mutation to a model. - each parameter is changed to a random value in the domain of the parameter at the given probability. + each parameter is changed to a random value in the parameter space at the given probability. @param[in,out] model: structured numpy.ndarray holding the model parameters. model is modified in place. diff --git a/pmsco/optimizers/gradient.py b/pmsco/optimizers/gradient.py index 6a72fe8..07b34c3 100644 --- a/pmsco/optimizers/gradient.py +++ b/pmsco/optimizers/gradient.py @@ -8,7 +8,7 @@ the optimization task is distributed over multiple processes using MPI. the optimization must be started with N+1 processes in the MPI environment, where N equals the number of fit parameters. -IMPLEMENTATION IN PROGRESS - DEBUGGING +THIS MODULE IS NOT INTEGRATED INTO PMSCO YET. Requires: scipy, numpy @@ -109,7 +109,7 @@ class MscMaster(MscProcess): def setup(self, project): super(MscMaster, self).setup(project) - self.dom = project.create_domain() + self.dom = project.create_model_space() self.running_slaves = self.slaves self._outfile = open(self.project.output_file + ".dat", "w") diff --git a/pmsco/optimizers/grid.py b/pmsco/optimizers/grid.py index aed7b9b..388929e 100644 --- a/pmsco/optimizers/grid.py +++ b/pmsco/optimizers/grid.py @@ -63,7 +63,7 @@ class GridPopulation(object): ## @var positions # (numpy.ndarray) flat list of grid coordinates and results. # - # the column names include the names of the model parameters, taken from domain.start, + # the column names include the names of the model parameters, taken from model_space.start, # and the special names @c '_model', @c '_rfac'. # the special fields have the following meanings: # @@ -113,11 +113,12 @@ class GridPopulation(object): dt.sort(key=lambda t: t[0].lower()) return dt - def setup(self, domain): + def setup(self, model_space): """ set up the population and result arrays. - @param domain: definition of initial and limiting model parameters + @param model_space: (pmsco.project.ModelSpace) + definition of initial and limiting model parameters expected by the cluster and parameters functions. the attributes have the following meanings: @arg start: values of the fixed parameters. @@ -128,24 +129,24 @@ class GridPopulation(object): if step <= 0, the parameter is kept constant. """ - self.model_start = domain.start - self.model_min = domain.min - self.model_max = domain.max - self.model_step = domain.step + self.model_start = model_space.start + self.model_min = model_space.min + self.model_max = model_space.max + self.model_step = model_space.step self.model_count = 1 self.search_keys = [] self.fixed_keys = [] scales = [] - for p in domain.step.keys(): - if domain.step[p] > 0: - n = np.round((domain.max[p] - domain.min[p]) / domain.step[p]) + 1 + for p in model_space.step.keys(): + if model_space.step[p] > 0: + n = int(np.round((model_space.max[p] - model_space.min[p]) / model_space.step[p]) + 1) else: n = 1 if n > 1: self.search_keys.append(p) - scales.append(np.linspace(domain.min[p], domain.max[p], n)) + scales.append(np.linspace(model_space.min[p], model_space.max[p], n)) else: self.fixed_keys.append(p) @@ -221,7 +222,7 @@ class GridPopulation(object): @raise AssertionError if the number of rows of the two files differ. """ - data = np.genfromtxt(filename, names=True) + data = np.atleast_1d(np.genfromtxt(filename, names=True)) assert data.shape == array.shape for name in data.dtype.names: array[name] = data[name] @@ -298,12 +299,12 @@ class GridSearchHandler(handlers.ModelHandler): the minimum number of slots is 1, the recommended value is 10 or greater. the population size is set to at least 4. - @return: + @return (int) number of models to be calculated. """ super(GridSearchHandler, self).setup(project, slots) self._pop = GridPopulation() - self._pop.setup(self._project.create_domain()) + self._pop.setup(self._project.create_model_space()) self._invalid_limit = max(slots, self._invalid_limit) self._outfile = open(self._project.output_file + ".dat", "w") @@ -311,7 +312,7 @@ class GridSearchHandler(handlers.ModelHandler): self._outfile.write(" ".join(self._pop.positions.dtype.names)) self._outfile.write("\n") - return None + return self._pop.model_count def cleanup(self): self._outfile.close() diff --git a/pmsco/optimizers/population.py b/pmsco/optimizers/population.py index 8129556..f875341 100644 --- a/pmsco/optimizers/population.py +++ b/pmsco/optimizers/population.py @@ -3,7 +3,7 @@ base classes for population-based optimizers. a _population_ is a set of individuals or particles -that can assume coordinates from the parameter domain. +that can assume coordinates from the parameter space. a tuple of coordinates is also called _model parameters_ which define the _model_. the individuals travel through parameter space according to an algorithm defined separately. depending on the algorithm, the population can converge towards the optimum coordinates based on calculated R-factors. @@ -117,7 +117,7 @@ class Population(object): ## @var pos # (numpy.ndarray) current positions of each particle. # - # the column names include the names of the model parameters, taken from domain.start, + # the column names include the names of the model parameters, taken from model_space.start, # and the special names @c '_particle', @c '_model', @c '_rfac'. # the special fields have the following meanings: # @@ -299,7 +299,7 @@ class Population(object): arr[k] = model_dict[k] return arr - def setup(self, size, domain, **kwargs): + def setup(self, size, model_space, **kwargs): """ set up the population arrays seeded with previous results and the start model. @@ -315,12 +315,12 @@ class Population(object): @param size: requested number of particles. - @param domain: definition of initial and limiting model parameters + @param model_space: definition of initial and limiting model parameters expected by the cluster and parameters functions. - @arg domain.start: initial guess. - @arg domain.min: minimum values allowed. - @arg domain.max: maximum values allowed. if min == max, the parameter is kept constant. - @arg domain.step: depends on the actual algorithm. + @arg model_space.start: initial guess. + @arg model_space.min: minimum values allowed. + @arg model_space.max: maximum values allowed. if min == max, the parameter is kept constant. + @arg model_space.step: depends on the actual algorithm. not used in particle swarm. standard deviation of mutations in genetic optimization. @@ -335,14 +335,14 @@ class Population(object): """ self.size_req = size self.size_act = size - self.model_start = domain.start - self.model_min = domain.min - self.model_max = domain.max - self.model_step = domain.step - self.model_start_array = self.get_model_array(domain.start) - self.model_min_array = self.get_model_array(domain.min) - self.model_max_array = self.get_model_array(domain.max) - self.model_step_array = self.get_model_array(domain.step) + self.model_start = model_space.start + self.model_min = model_space.min + self.model_max = model_space.max + self.model_step = model_space.step + self.model_start_array = self.get_model_array(model_space.start) + self.model_min_array = self.get_model_array(model_space.min) + self.model_max_array = self.get_model_array(model_space.max) + self.model_step_array = self.get_model_array(model_space.step) # allocate arrays dt = self.get_pop_dtype(self.model_start) @@ -378,8 +378,8 @@ class Population(object): """ initializes a random population. - the position array is filled with random values (uniform distribution) from the parameter domain. - velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter domain. + the position array is filled with random values (uniform distribution) from the parameter space. + velocity values are randomly chosen between -1/8 to 1/8 times the width (max - min) of the parameter space. the method does not update the particle info fields. @@ -402,8 +402,8 @@ class Population(object): the method does not update the particle info fields. @param params: dictionary of model parameters. - the keys must match the ones of domain.start. - values that lie outside of the domain are skipped. + the keys must match the ones of model_space.start. + values that lie outside of the model space are skipped. @param index: index of the particle that is seeded. the index must be in the allowed range of the self.pos array. @@ -440,7 +440,7 @@ class Population(object): this method is called as a part of setup(). it must not be called after the optimization has started. - parameter values that lie outside the parameter domain (min/max) are left at their previous value. + parameter values that lie outside the model space (min/max) are left at their previous value. @note this method does not initialize the remaining particles. neither does it set the velocity and best position arrays of the seeded particles. @@ -488,7 +488,7 @@ class Population(object): count_limit = self.pos.shape[0] count_limit = min(count_limit, self.pos.shape[0] - first_particle) - seed = np.genfromtxt(seed_file, names=True) + seed = np.atleast_1d(np.genfromtxt(seed_file, names=True)) try: seed = seed[seed['_rfac'] <= rfac_limit] except ValueError: @@ -561,7 +561,7 @@ class Population(object): this method does not handle exceptions coming from numpy.genfromtxt such as missing file (IOError) or conversion errors (ValueError). exception handling should be done by the owner of the population (typically the model handler). - patch values that lie outside the population domain aresilently ignored. + patch values that lie outside the model space are silently ignored. @param patch_file: path and name of the patch file. the file must have the correct format for load_array(), @@ -572,7 +572,7 @@ class Population(object): @raise ValueError for conversion errors. """ - self.pos_patch = np.genfromtxt(patch_file, names=True) + self.pos_patch = np.atleast_1d(np.genfromtxt(patch_file, names=True)) source_fields = set(self.pos_patch.dtype.names) dest_fields = set(self.model_start.keys()) common_fields = source_fields & dest_fields @@ -592,7 +592,7 @@ class Population(object): the method overwrites only parameter values, not control variables. _particle indices that lie outside the range of available population items are ignored. - parameter values that lie outside the parameter domain (min/max) are ignored. + parameter values that lie outside the model space (min/max) are ignored. """ if self.pos_patch is not None: logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation)) @@ -658,7 +658,7 @@ class Population(object): elif isinstance(source, str): for i in range(timeout): try: - array = np.genfromtxt(source, names=True) + array = np.atleast_1d(np.genfromtxt(source, names=True)) except IOError: time.sleep(1) else: @@ -708,7 +708,7 @@ class Population(object): the method also performs a range check. the parameter values are constrained according to self.position_constrain_mode - and the parameter domain self.model_min and self.model_max. + and the model space self.model_min and self.model_max. if the constrain mode is `error`, models that violate the constraints are ignored and removed from the import queue. @@ -844,18 +844,18 @@ class Population(object): """ constrain a position to the given bounds. - this method resolves violations of parameter boundaries, i.e. when a particle is leaving the designated domain. - if a violation is detected, the method calculates an updated position inside the domain + this method resolves violations of parameter boundaries, i.e. when a particle is leaving the designated model space. + if a violation is detected, the method calculates an updated position inside the model space according to the selected algorithm. in some cases the velocity or boundaries have to be updated as well. the method distinguishes overshoot and undershoot violations. - overshoot is the normal case when the particle is leaving the domain. + overshoot is the normal case when the particle is leaving the model space. it is handled according to the selected algorithm. undershoot is a special case where the particle was outside the boundaries before the move. this case can occur in the beginning if the population is seeded with out-of-bounds values. - undershoot is always handled by placing the particle at a random position in the domain + undershoot is always handled by placing the particle at a random position in the model space regardless of the chosen constraint mode. @note it is important to avoid bias while handling constraint violations. @@ -877,7 +877,7 @@ class Population(object): @param _mode: what to do if a boundary constraint is violated: @arg 're-enter': re-enter from the opposite side of the parameter interval. - @arg 'bounce': fold the motion vector at the boundary and move the particle back into the domain. + @arg 'bounce': fold the motion vector at the boundary and move the particle back into the model space. @arg 'scatter': place the particle at a random place between its old position and the violated boundary. @arg 'stick': place the particle at the violated boundary. @arg 'expand': move the boundary so that the particle fits. @@ -982,7 +982,7 @@ class Population(object): @param search_array: population-like numpy structured array to search for the model. defaults to self.results if None. - @param precision: precision relative to model domain at which elements should be considered equal. + @param precision: precision relative to model space at which elements should be considered equal. @return index of the first occurrence. @@ -1071,7 +1071,7 @@ class Population(object): @raise AssertionError if the number of rows of the two files differ. """ - data = np.genfromtxt(filename, names=True) + data = np.atleast_1d(np.genfromtxt(filename, names=True)) assert data.shape == array.shape for name in data.dtype.names: array[name] = data[name] @@ -1182,7 +1182,7 @@ class PopulationHandler(handlers.ModelHandler): which may slow down convergence. if calculations take a long time compared to the available computation time - or spawn a lot of sub-tasks due to complex symmetry, + or spawn a lot of sub-tasks due to complex model space, and you prefer to allow for a good number of generations, you should override the population size. @@ -1190,7 +1190,7 @@ class PopulationHandler(handlers.ModelHandler): @param slots: number of calculation processes available through MPI. - @return: None + @return (int) population size """ super(PopulationHandler, self).setup(project, slots) @@ -1206,10 +1206,10 @@ class PopulationHandler(handlers.ModelHandler): outfile.write(" ".join(self._pop.results.dtype.names)) outfile.write("\n") - return None + return self._pop_size def setup_population(self): - self._pop.setup(self._pop_size, self._project.create_domain(), **self._project.optimizer_params) + self._pop.setup(self._pop_size, self._project.create_model_space(), **self._project.optimizer_params) def cleanup(self): super(PopulationHandler, self).cleanup() @@ -1235,7 +1235,7 @@ class PopulationHandler(handlers.ModelHandler): the effect can be reduced by making the population larger than the number of processes. the created tasks are returned as the function result and added to self._pending_tasks. - + @return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()) or if the time limit is approaching. diff --git a/pmsco/optimizers/table.py b/pmsco/optimizers/table.py index ffedfa8..b9e7171 100644 --- a/pmsco/optimizers/table.py +++ b/pmsco/optimizers/table.py @@ -83,8 +83,8 @@ class TablePopulation(population.Population): although, a seed file is accepted, it is not used. patching is allowed, but there is normally no advantage over modifying the table. - the domain is used to define the model parameters and the parameter range. - models violating the parameter domain are ignored. + the model space is used to define the model parameters and the parameter range. + models violating the parameter model space are ignored. """ ## @var table_source @@ -103,20 +103,20 @@ class TablePopulation(population.Population): self.table_source = None self.position_constrain_mode = 'error' - def setup(self, size, domain, **kwargs): + def setup(self, size, model_space, **kwargs): """ - set up the population arrays, parameter domain and data source. + set up the population arrays, parameter model space and data source. @param size: requested number of particles. this does not need to correspond to the number of table entries. on each generation the population loads up to this number of new entries from the table source. - @param domain: definition of initial and limiting model parameters + @param model_space: definition of initial and limiting model parameters expected by the cluster and parameters functions. - @arg domain.start: not used. - @arg domain.min: minimum values allowed. - @arg domain.max: maximum values allowed. - @arg domain.step: not used. + @arg model_space.start: not used. + @arg model_space.min: minimum values allowed. + @arg model_space.max: maximum values allowed. + @arg model_space.step: not used. the following arguments are keyword arguments. the method also accepts the inherited arguments for seeding. they do not have an effect, however. @@ -128,7 +128,7 @@ class TablePopulation(population.Population): @return: None """ - super(TablePopulation, self).setup(size, domain, **kwargs) + super(TablePopulation, self).setup(size, model_space, **kwargs) self.table_source = kwargs['table_source'] def advance_population(self): diff --git a/pmsco/pmsco.py b/pmsco/pmsco.py index 2736a68..c05a08a 100755 --- a/pmsco/pmsco.py +++ b/pmsco/pmsco.py @@ -131,6 +131,8 @@ def set_common_args(project, args): if args.output_file: project.set_output(args.output_file) log_file = args.output_file + ".log" + if args.db_file: + project.db_file = args.db_file if args.log_file: log_file = args.log_file setup_logging(enable=args.log_enable, filename=log_file, level=args.log_level) @@ -246,6 +248,7 @@ class Args(object): self.seed_limit = 0 self.data_dir = "" self.output_file = output_file + self.db_file = "" self.time_limit = 24.0 self.keep_files = files.FILE_CATEGORIES_TO_KEEP self.keep_best = 10 @@ -272,7 +275,7 @@ def get_cli_parser(default_args=None): 1) a project class derived from pmsco.project.Project. the class implements/overrides all necessary methods of the calculation project, - in particular create_domain, create_cluster, and create_params. + in particular create_model_space, create_cluster, and create_params. 2) a global function named create_project. the function accepts a namespace object from the argument parser. @@ -307,6 +310,8 @@ def get_cli_parser(default_args=None): 'default: working directory') parser.add_argument('-o', '--output-file', default=default_args.output_file, help='base path for intermediate and output files.') + parser.add_argument('-b', '--db-file', default=default_args.db_file, + help='name of an sqlite3 database file where the results should be stored.') parser.add_argument('--table-file', help='path and name of population table file for table optimization mode. ' + 'the file must have the same structure as the .pop or .dat files.') @@ -321,7 +326,7 @@ def get_cli_parser(default_args=None): parser.add_argument('--keep-levels', type=int, choices=range(5), default=default_args.keep_levels, help='task level down to which result files of best models are kept. ' - '0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region.') + '0 = model, 1 = scan, 2 = domain, 3 = emitter, 4 = region.') parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit, help='wall time limit in hours. the optimizers try to finish before the limit.') parser.add_argument('--log-file', default=default_args.log_file, diff --git a/pmsco/project.py b/pmsco/project.py index acc326e..152005e 100644 --- a/pmsco/project.py +++ b/pmsco/project.py @@ -4,7 +4,7 @@ project-independent classes which store and handle model parameters. the most important class defined here is Project. each calculation project needs to derive its own project class from it. -the Domain and Params classes are typically used unchanged. +the ModelSpace and CalculatorParams classes are typically used unchanged. @note nomenclature: the term @e parameters has several meanings in the code and documentation. the following distinctive terms are used in updated documentation sections. @@ -53,10 +53,10 @@ from pmsco.helpers import BraceMessage as BMsg logger = logging.getLogger(__name__) -ParamDomain = collections.namedtuple('ParamDomain', ['start', 'min', 'max', 'step']) +ParamSpace = collections.namedtuple('ParamSpace', ['start', 'min', 'max', 'step']) -class Domain(object): +class ModelSpace(object): """ Domain of model parameters. @@ -151,14 +151,14 @@ class Domain(object): @param name (string) name of the parameter. - @return named tuple ParamDomain(start, min, max, step) of the parameter. + @return named tuple ParamSpace(start, min, max, step) of the parameter. @raise IndexError if the parameter is not defined. """ - return ParamDomain(self.start[name], self.min[name], self.max[name], self.step[name]) + return ParamSpace(self.start[name], self.min[name], self.max[name], self.step[name]) -class Params(object): +class CalculatorParams(object): """ calculation parameters for a single scattering calculation job. @@ -166,7 +166,7 @@ class Params(object): the class can hold parameters for both the MSC and EDAC codes. some parameters are used by both codes, others are used just by one of them. - newer features such as multiple emitters, multiple symmetries, and others are supported in EDAC mode only. + newer features such as multiple emitters, multiple domains, and others are supported in EDAC mode only. MSC mode is currently not maintained. objects of this class are created by the implementation of the create_params() method @@ -253,7 +253,7 @@ class Params(object): def __init__(self): self.title = "default parameters" - self.comment = "set by project.Params()" + self.comment = "set by project.CalculatorParams()" self.cluster_file = "" self.output_file = "" self.scan_file = "" @@ -580,7 +580,7 @@ class Project(object): the results include a measure of the quality of the simulated data compared to experimental data. each calculation project must derive from this class. - it must implement the create_domain(), create_cluster(), and create_params() methods. + it must implement the create_model_space(), create_cluster(), and create_params() methods. the other methods and attributes of this class are for passing command line parameters to the calculation modules. @@ -621,14 +621,14 @@ class Project(object): # # @c scans must be considered read-only. use project methods to change it. - ## @var symmetries (list of arbitrary objects) - # list of symmetries for which calculations are to be run. + ## @var domains (list of arbitrary objects) + # list of domains for which calculations are to be run. # # it is up to the derived class what kind of objects are stored in the list. # the recommended kind of objects are dictionaries which hold parameter values, # similar to the model dictionaries. # - # the list must be populated by calling the add_symmetry() method. + # the list must be populated by calling the add_domain() method. ## @var cluster_generator (ClusterGenerator object) # provides the cluster generator methods. @@ -684,6 +684,11 @@ class Project(object): # # output_dir and output_file are set at once by @ref set_output. + ## @var db_file (string) + # name of an sqlite3 database file where the calculation results should be stored. + # + # the default value is ':memory:', which creates a volatile in-memory database. + ## @var timedelta_limit (datetime.timedelta) # wall time after which no new calculations should be started. # @@ -715,7 +720,7 @@ class Project(object): # # @arg 0 = model level: combined results only. # @arg 1 = scan level: scan nodes in addition to combined results (level 0). - # @arg 2 = symmetry level: symmetry nodes in addition to level 1. + # @arg 2 = domain level: domain nodes in addition to level 1. # @arg 3 = emitter level: emitter nodes in addition to level 1. # @arg 4 = region level: region nodes in addition to level 1. @@ -738,13 +743,14 @@ class Project(object): def __init__(self): self.mode = "single" self.job_name = "" + self.job_tags = {} self.git_hash = "" self.description = "" self.features = {} self.cluster_format = mc.FMT_EDAC self.cluster_generator = mc.LegacyClusterGenerator(self) self.scans = [] - self.symmetries = [] + self.domains = [] self.optimizer_params = { 'pop_size': 0, 'seed_file': "", @@ -755,6 +761,7 @@ class Project(object): self.data_dir = "" self.output_dir = "" self.output_file = "pmsco_data" + self.db_file = ':memory:' self.timedelta_limit = datetime.timedelta(days=1) self.combined_scan = None self.combined_modf = None @@ -764,7 +771,7 @@ class Project(object): self.handler_classes = { 'model': handlers.SingleModelHandler, 'scan': handlers.ScanHandler, - 'sym': handlers.SymmetryHandler, + 'domain': handlers.DomainHandler, 'emit': handlers.EmitterHandler, 'region': handlers.SingleRegionHandler } @@ -773,27 +780,27 @@ class Project(object): self._tasks_fields = [] self._db = database.ResultsDatabase() - def create_domain(self): + def create_model_space(self): """ - create a msc_project.Domain object which defines the allowed range for model parameters. + create a project.ModelSpace object which defines the allowed range for model parameters. this method must be implemented by the actual project class. - the Domain object must declare all model parameters used in the project. + the ModelSpace object must declare all model parameters used in the project. - @return Domain object + @return ModelSpace object """ return None def create_params(self, model, index): """ - create a Params object given the model parameters and calculation index. + create a CalculatorParams object given the model parameters and calculation index. @param model (dictionary) model parameters to be used in the calculation. @param index (named tuple CalcID) calculation index. the method should consider only the following attributes: - @arg @c scan scan index (index into Project.scans) - @arg @c sym symmetry index (index into Project.symmetries) + @arg `scan` scan index (index into Project.scans) + @arg `domain` domain index (index into Project.domains) """ return None @@ -896,35 +903,35 @@ class Project(object): return scan - def clear_symmetries(self): + def clear_domains(self): """ - clear symmetries. + clear domains. - delete all symmetries in self.symmetries and empty the list. + delete all domains in self.domains and empty the list. @return: None """ - self.symmetries = [] + self.domains = [] - def add_symmetry(self, symmetry): + def add_domain(self, domain): """ - add a symmetry to the list of symmetries. + add a domain to the list of domains. - this class declares the list of symmetries. - it does not define what should be in the list of symmetries. - however, there must be an entry for each symmetry to be calculated. + this class declares the list of domains. + it does not define what should be in the list of domains. + however, there must be an entry for each domain to be calculated. if the list is empty, no calculation will be executed. - @attention initially, the symmetries list is empty. - your project needs to add at least one symmetry. + @attention initially, the domains list is empty. + your project needs to add at least one domain. otherwise, no calculation will be executed. - @param symmetry: it is up to the derived project class to specify and interpret the data stored here. - it is recommended to store a dictionary with symmetry parameters similar to the model parameters. + @param domain: it is up to the derived project class to specify and interpret the data stored here. + it is recommended to store a dictionary with domain parameters similar to the model parameters. @return: None """ - self.symmetries.append(symmetry) + self.domains.append(domain) def set_output(self, filename): """ @@ -938,14 +945,29 @@ class Project(object): self.output_file = filename path, name = os.path.split(filename) self.output_dir = path + self.job_name = name - def set_timedelta_limit(self, timedelta): + def set_timedelta_limit(self, timedelta, margin_minutes=10): """ - set the walltime limit - - timedelta (datetime.timedelta) + set the walltime limit with a safety margin. + + this method sets the internal self.timedelta_limit attribute. + by default, a safety margin of 10 minutes is subtracted to the main argument + in order to increase the probability that the process ends in time. + if this is not wanted, the project class may override the method and provide its own margin. + + the method is typically called with the command line time limit from the main module. + + @note the safety margin could be applied at various levels. + it is done here because it can easily be overridden by the project subclass. + to keep run scripts simple, the command line can be given the same time limit + as the job scheduler of the computing cluster. + + @param timedelta: (datetime.timedelta) max. duration of the calculation process (wall time). + + @param margin_minutes: (int) safety margin in minutes to subtract from timedelta. """ - self.timedelta_limit = timedelta + self.timedelta_limit = timedelta - datetime.timedelta(minutes=margin_minutes) def log_project_args(self): """ @@ -970,38 +992,40 @@ class Project(object): logger.warning("data directory: {0}".format(self.data_dir)) logger.warning("output file: {0}".format(self.output_file)) + logger.warning("database: {0}".format(self.db_file)) _files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep))) for idx, scan in enumerate(self.scans): - logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan))) - for idx, sym in enumerate(self.symmetries): - logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym)) + logger.warning(f"scan {idx}: {scan.filename} ({scan.emitter} {scan.initial_state}") + for idx, dom in enumerate(self.domains): + logger.warning(f"domain {idx}: {dom}") except AttributeError: logger.warning("AttributeError in log_project_args") - def combine_symmetries(self, parent_task, child_tasks): + def combine_domains(self, parent_task, child_tasks): """ - combine results of different symmetry into one result and calculate the modulation function. + combine results of different domain into one result and calculate the modulation function. - the symmetry results are read from the file system using the indices defined by the child_tasks, + the domain results are read from the file system using the indices defined by the child_tasks, and the combined result is written to the file system with the index defined by parent_task. - by default, this method adds all symmetries with equal weight. - weights can be defined in the model dictionary with keys 'wsym0', 'wsym1', etc. + by default, this method adds all domains with equal weight. + weights can be defined in the model dictionary with keys 'wdom0', 'wdom1', etc. missing weights default to 1. - note: to avoid correlated parameters, one symmetry must always have a fixed weight. + to avoid correlated parameters, one domain must always have a fixed weight. + it is recommended to leave 'wdom0' at its default. - @param parent_task: (CalculationTask) parent task of the symmetry tasks. + @param parent_task: (CalculationTask) parent task of the domain tasks. the method must write the results to the files indicated by the @c result_filename and @c modf_filename attributes. - @param child_tasks: (sequence of CalculationTask) tasks which identify each symmetry. + @param child_tasks: (sequence of CalculationTask) tasks which identify each domain. the method must read the source data from the files indicated by the @c result_filename attributes. - the sequence is sorted by task ID, i.e., essentially, by symmetry index. + the sequence is sorted by task ID, i.e., essentially, by domain index. @return: None @@ -1009,7 +1033,7 @@ class Project(object): @raise IOError if a filename is missing - @note the weights of the symmetries (in derived classes) can be part of the optimizable model parameters. + @note the weights of the domains (in derived classes) can be part of the optimizable model parameters. the model parameters are available as the @c model attribute of the calculation tasks. """ @@ -1021,7 +1045,7 @@ class Project(object): result_data = data.copy() result_data['i'] = 0. try: - weight = task.model['wsym{}'.format(task.id.sym)] + weight = task.model['wdom{}'.format(task.id.domain)] except KeyError: weight = 1. result_data['i'] += weight * data['i'] @@ -1212,9 +1236,12 @@ class Project(object): this instance writes the header of the tasks.dat file that will receive sub-task evaluation results from the evaluate_result() method. + it also initializes the database where the task results will be stored. + this is either a volatile in-memory database or a user-specified sqlite3 database file. + @param handlers: dictionary listing the initialized task handler instances. the dictionary keys are the attribute names of pmsco.dispatch.CalcID: - 'model', 'scan', 'sym', 'emit' and 'region'. + 'model', 'scan', 'domain', 'emit' and 'region'. @return: None """ @@ -1223,8 +1250,8 @@ class Project(object): fields.extend(dispatch.CalcID._fields) fields.append("secs") fields = ["_" + f for f in fields] - dom = self.create_domain() - model_fields = list(dom.start.keys()) + mspace = self.create_model_space() + model_fields = list(mspace.start.keys()) model_fields.sort(key=lambda name: name.lower()) fields.extend(model_fields) self._tasks_fields = fields @@ -1234,9 +1261,10 @@ class Project(object): outfile.write(" ".join(fields)) outfile.write("\n") - # todo : change to file-database - self._db.connect(":memory:") - project_id = self._db.register_project(self.__class__.__name__, sys.argv[0]) + self._db.connect(self.db_file) + project_name = self.__class__.__name__ + project_module = self.__class__.__module__ + project_id = self._db.register_project(project_name, project_module) job_id = self._db.register_job(project_id, self.job_name, self.mode, @@ -1244,6 +1272,9 @@ class Project(object): self.git_hash, datetime.datetime.now(), self.description) + logger.debug(BMsg("database {db_file}, project {proj}, job {job}", + db_file=self.db_file, proj=project_id, job=job_id)) + self._db.insert_jobtags(job_id, self.job_tags) self._db.register_params(model_fields) self._db.create_models_view() @@ -1283,7 +1314,8 @@ class Project(object): with open(self.output_file + ".tasks.dat", "a") as outfile: outfile.write(" ".join(format(value) for value in values_list) + "\n") - self._db.insert_result(parent_task.id, values_dict) + db_id = self._db.insert_result(parent_task.id, values_dict) + logger.debug(BMsg("model {model}, database result {db_id}", model=parent_task.id.model, db_id=db_id)) return None @@ -1529,7 +1561,7 @@ class Project(object): """ project hook before atomic scattering factors are calculated. - this method derives modified Params and Cluster objects for the atomic scattering calculation + this method derives modified CalculatorParams and Cluster objects for the atomic scattering calculation from the original objects that will be used in the multiple scattering calculation. in the basic version, the method does not change the objects @@ -1542,7 +1574,7 @@ class Project(object): or None if no global scattering factors should be calculated. do not modify this object! - @param par: @ref pmsco.project.Params object representing the preliminary + @param par: @ref pmsco.project.CalculatorParams object representing the preliminary multiple scattering input parameters of the current task. the method can make modifications to this object instance directly. @@ -1565,7 +1597,7 @@ class Project(object): """ project hook after atomic scattering factors are calculated. - this method cleans up the Params and Cluster objects from the atomic scattering calculation + this method cleans up the CalculatorParams and Cluster objects from the atomic scattering calculation so that they can be used in the multiple scattering calculation. in the basic version, the method just passes the input parameters for model tasks @@ -1578,7 +1610,7 @@ class Project(object): (to calculate the fixed scattering factors that will be used for all models) or None if no global scattering factors should be calculated. - @param par: @ref pmsco.project.Params object representing the preliminary + @param par: @ref pmsco.project.CalculatorParams object representing the preliminary multiple scattering input parameters of the current task. @param clu: @ref pmsco.cluster.Cluster object representing the preliminary @@ -1597,18 +1629,18 @@ class Project(object): def cleanup(self): """ - delete unwanted files at the end of a project. + delete unwanted files at the end of a project and close the database. @return: None """ - self.cleanup_files() + self.cleanup_files(incomplete_models=True) self._db.disconnect() - def cleanup_files(self, keep=0): + def cleanup_files(self, keep=0, incomplete_models=False): """ - delete uninteresting files. + delete uninteresting files (any time). - these are all files that + delete all files that belong to one of the self.files.categories_to_delete categories or do not belong to one of the "best" models. @@ -1619,12 +1651,19 @@ class Project(object): this means that in total up to `n = 10 + 10 * n_scans` models may be kept, where n_scans is the number of scan files in the job. + this method can be called at any time during the calculation process. + it executes on complete models only + unless incomplete_models is True. + @param keep: minimum number of best models to keep. 0 (default): use the project parameter self.keep_best. + @param incomplete_models: (bool) delete files of incomplete models as well. + by default (False), incomplete models are not deleted. + @return None """ - self.files.delete_files() + self.files.delete_files(incomplete_models=incomplete_models) if 'rfac' in self.files.categories_to_delete: keep = max(keep, self.keep_best) keepers = self._db.query_best_task_models(self.keep_levels, keep) diff --git a/projects/common/clusters/crystals.py b/projects/common/clusters/crystals.py new file mode 100644 index 0000000..a24a5df --- /dev/null +++ b/projects/common/clusters/crystals.py @@ -0,0 +1,138 @@ +""" +@package projects.common.clusters.crystals +cluster generators for some common bulk crystals + +@author Matthias Muntwiler, matthias.muntwiler@psi.ch + +@copyright (c) 2015-19 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import numpy as np +import os.path +import periodictable as pt +import logging + +import pmsco.cluster as cluster +import pmsco.dispatch as dispatch +import pmsco.project as project +from pmsco.helpers import BraceMessage as BMsg + +logger = logging.getLogger(__name__) + + +class ZincblendeCluster(cluster.ClusterGenerator): + def __init__(self, proj): + super(ZincblendeCluster, self).__init__(proj) + self.atomtype1 = 30 + self.atomtype2 = 16 + self.bulk_lattice = 1.0 + self.surface = (1, 1, 1) + + @classmethod + def check(cls, outfilename=None, model_dict=None, domain_dict=None): + """ + function to test and debug the cluster generator. + + to use this function, you don't need to import or initialize anything but the class. + though the project class is used internally, the result does not depend on any project settings. + + @param outfilename: name of output file for the cluster (XYZ format). + the file is written to the same directory where this module is located. + if empty or None, no file is written. + + @param model_dict: dictionary of model parameters to override the default values. + + @param domain_dict: dictionary of domain parameters to override the default values. + + @return: @ref pmsco.cluster.Cluster object + """ + proj = project.Project() + dom = project.ModelSpace() + dom.add_param('dlat', 10.) + dom.add_param('rmax', 5.0) + if model_dict: + dom.start.update(model_dict) + + try: + proj.domains[0].update({'zrot': 0.}) + except IndexError: + proj.add_domain({'zrot': 0.}) + if domain_dict: + proj.domains[0].update(domain_dict) + proj.add_scan("", 'C', '1s') + + clu_gen = cls(proj) + index = dispatch.CalcID(0, 0, 0, -1, -1) + clu = clu_gen.create_cluster(dom.start, index) + + if outfilename: + project_dir = os.path.dirname(os.path.abspath(__file__)) + outfilepath = os.path.join(project_dir, outfilename) + clu.save_to_file(outfilepath, fmt=cluster.FMT_XYZ, comment="{0} {1} {2}".format(cls, index, str(dom.start))) + + return clu + + def count_emitters(self, model, index): + return 1 + + def create_cluster(self, model, index): + """ + calculate a specific set of atom positions given the optimizable parameters. + + @param model (dict) optimizable parameters + @arg model['dlat'] bulk lattice constant in Angstrom + @arg model['rmax'] cluster radius + @arg model['phi'] azimuthal rotation angle in degrees + + @param dom (dict) domain + @arg dom['term'] surface termination + """ + clu = cluster.Cluster() + clu.comment = "{0} {1}".format(self.__class__, index) + clu.set_rmax(model['rmax']) + a_lat = model['dlat'] + dom = self.project.domains[index] + try: + term = int(dom['term']) + except ValueError: + term = pt.elements.symbol(dom['term'].strip().number) + + if self.surface == (0, 0, 1): + # identity matrix + m = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + elif self.surface == (1, 1, 1): + # this will map the [111] direction onto the z-axis + m1 = np.array([1, -1, 0]) * math.sqrt(1/2) + m2 = np.array([0.5, 0.5, -1]) * math.sqrt(2/3) + m3 = np.array([1, 1, 1]) * math.sqrt(1/3) + m = np.array([m1, m2, m3]) + else: + raise ValueError("unsupported surface specification") + + # lattice vectors + a1 = np.matmul(m, np.array((1.0, 0.0, 0.0)) * a_lat) + a2 = np.matmul(m, np.array((0.0, 1.0, 0.0)) * a_lat) + a3 = np.matmul(m, np.array((0.0, 0.0, 1.0)) * a_lat) + + # basis + b1 = [np.array((0.0, 0.0, 0.0)), (a2 + a3) / 2, (a3 + a1) / 2, (a1 + a2) / 2] + if term == self.atomtype1: + d1 = np.array((0, 0, 0)) + d2 = (a1 + a2 + a3) / 4 + else: + d1 = -(a1 + a2 + a3) / 4 + d2 = np.array((0, 0, 0)) + for b in b1: + clu.add_bulk(self.atomtype1, b + d1, a1, a2, a3) + clu.add_bulk(self.atomtype2, b + d2, a1, a2, a3) + + return clu diff --git a/projects/demo/fcc.py b/projects/demo/fcc.py index a4a0abe..c103d7a 100644 --- a/projects/demo/fcc.py +++ b/projects/demo/fcc.py @@ -91,7 +91,7 @@ class FCC111Project(mp.Project): par['V0'] = inner potential par['Zsurf'] = position of surface """ - params = mp.Params() + params = mp.CalculatorParams() params.title = "fcc(111)" params.comment = "{0} {1}".format(self.__class__, index) @@ -133,11 +133,11 @@ class FCC111Project(mp.Project): return params - def create_domain(self): + def create_model_space(self): """ - define the domain of the optimization parameters. + define the model space of the optimization parameters. """ - dom = mp.Domain() + dom = mp.ModelSpace() if self.mode == "single": dom.add_param('rmax', 5.00, 5.00, 15.00, 2.50) @@ -190,7 +190,7 @@ def create_project(): project.scan_dict['alpha'] = {'filename': os.path.join(project_dir, "demo_alpha_scan.etp"), 'emitter': "Ni", 'initial_state': "3s"} - project.add_symmetry({'default': 0.0}) + project.add_domain({'default': 0.0}) return project @@ -229,8 +229,9 @@ def set_project_args(project, project_args): try: if project_args.initial_state: - project.initial_state = project_args.initial_state - logger.warning(BMsg("override initial states to {0}", project.initial_state)) + for scan in project.scans: + scan.initial_state = project_args.initial_state + logger.warning(f"override initial states of all scans to {project_args.initial_state}") except AttributeError: pass diff --git a/projects/demo/molecule.py b/projects/demo/molecule.py new file mode 100644 index 0000000..cdbf02c --- /dev/null +++ b/projects/demo/molecule.py @@ -0,0 +1,384 @@ +""" +@package pmsco.projects.demo.molecule +scattering calculation project for single molecules + +the atomic positions are read from a molecule file. +cluster file, emitter (by chemical symbol), initial state and kinetic energy are specified on the command line. +there are no structural parameters. + +@author Matthias Muntwiler, matthias.muntwiler@psi.ch + +@copyright (c) 2015-20 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +import math +import numpy as np +import os.path +from pathlib import Path +import periodictable as pt +import argparse +import logging + +# noinspection PyUnresolvedReferences +from pmsco.calculators.calculator import InternalAtomicCalculator +# noinspection PyUnresolvedReferences +from pmsco.calculators.edac import EdacCalculator +# noinspection PyUnresolvedReferences +from pmsco.calculators.phagen.runner import PhagenCalculator +import pmsco.cluster as cluster +from pmsco.data import calc_modfunc_loess +# noinspection PyUnresolvedReferences +import pmsco.elements.bindingenergy +from pmsco.helpers import BraceMessage as BMsg +import pmsco.project as project + +logger = logging.getLogger(__name__) + + +class MoleculeFileCluster(cluster.ClusterGenerator): + """ + cluster generator based on external file. + + work in progress. + """ + def __init__(self, project): + super(MoleculeFileCluster, self).__init__(project) + self.base_cluster = None + + def load_base_cluster(self): + """ + load and cache the project-defined coordinate file. + + the file path is set in self.project.cluster_file. + the file must be in XYZ (.xyz) or PMSCO cluster (.clu) format (cf. pmsco.cluster module). + + @return: Cluster object (also referenced by self.base_cluster) + """ + if self.base_cluster is None: + clu = cluster.Cluster() + clu.set_rmax(120.0) + p = Path(self.project.cluster_file) + ext = p.suffix + if ext == ".xyz": + fmt = cluster.FMT_XYZ + elif ext == ".clu": + fmt = cluster.FMT_PMSCO + else: + raise ValueError(f"unknown cluster file extension {ext}") + clu.load_from_file(self.project.cluster_file, fmt=fmt) + self.base_cluster = clu + + return self.base_cluster + + def count_emitters(self, model, index): + """ + count the number of emitter configurations. + + the method creates the full cluster and counts the emitters. + + @param model: model parameters. + @param index: scan and domain are used by the create_cluster() method, + emit decides whether the method returns the number of configurations (-1), + or the number of emitters in the specified configuration (>= 0). + @return: number of emitter configurations. + """ + clu = self.create_cluster(model, index) + return clu.get_emitter_count() + + def create_cluster(self, model, index): + """ + import a cluster from a coordinate file (XYZ format). + + the method does the following: + - load the cluster file specified by self.cluster_file. + - trim the cluster according to model['rmax']. + - mark the 6 nitrogen atoms at the center of the trimer as emitters. + + @param model: rmax is the trim radius of the cluster in units of the surface lattice constant. + + @param index (named tuple CalcID) calculation index. + this method uses the domain index to look up domain parameters in + `pmsco.project.Project.domains`. + `index.emit` selects whether a single-emitter (>= 0) or all-emitter cluster (== -1) is returned. + + @return pmsco.cluster.Cluster object + """ + self.load_base_cluster() + clu = cluster.Cluster() + clu.copy_from(self.base_cluster) + clu.comment = f"{self.__class__}, {index}" + dom = self.project.domains[index.domain] + + # trim + clu.set_rmax(model['rmax']) + clu.trim_sphere(clu.rmax) + + # emitter selection + idx_emit = np.where(clu.data['s'] == self.project.scans[index.scan].emitter) + assert isinstance(idx_emit, tuple) + idx_emit = idx_emit[0] + if index.emit >= 0: + idx_emit = idx_emit[index.emit] + clu.data['e'][idx_emit] = 1 + + # rotation + if 'xrot' in model: + clu.rotate_z(model['xrot']) + elif 'xrot' in dom: + clu.rotate_z(dom['xrot']) + if 'yrot' in model: + clu.rotate_z(model['yrot']) + elif 'yrot' in dom: + clu.rotate_z(dom['yrot']) + if 'zrot' in model: + clu.rotate_z(model['zrot']) + elif 'zrot' in dom: + clu.rotate_z(dom['zrot']) + + logger.info(f"cluster for calculation {index}: " + f"{clu.get_atom_count()} atoms, {clu.get_emitter_count()} emitters") + + return clu + + +class MoleculeProject(project.Project): + """ + general molecule project. + + the following model parameters are used: + + @arg `model['zsurf']` : position of surface above molecule (angstrom) + @arg `model['Texp']` : experimental temperature (K) + @arg `model['Tdeb']` : debye temperature (K) + @arg `model['V0']` : inner potential (eV) + @arg `model['rmax']` : cluster radius (angstrom) + @arg `model['ares']` : angular resolution (degrees, FWHM) + @arg `model['distm']` : dmax for EDAC (angstrom) + + the following domain parameters are used. + they can also be specified as model parameters. + + @arg `'xrot'` : rotation about x-axis (applied first) (deg) + @arg `'yrot'` : rotation about y-axis (applied after x) (deg) + @arg `'zrot'` : rotation about z-axis (applied after x and y) (deg) + + the project parameters are: + + @arg `cluster_file` : name of cluster file of template molecule. + default: "dpdi-trimer.xyz" + """ + def __init__(self): + """ + initialize a project instance + """ + super(MoleculeProject, self).__init__() + self.model_space = project.ModelSpace() + self.scan_dict = {} + self.cluster_file = "demo-cluster.xyz" + self.cluster_generator = MoleculeFileCluster(self) + self.atomic_scattering_factory = PhagenCalculator + self.multiple_scattering_factory = EdacCalculator + self.phase_files = {} + self.rme_files = {} + self.modf_smth_ei = 0.5 + + def create_params(self, model, index): + """ + set a specific set of parameters given the optimizable parameters. + + @param model: (dict) optimization parameters + this method requires zsurf, V0, Texp, Tdeb, ares and distm. + + @param index (named tuple CalcID) calculation index. + this method formats the index into the comment line. + """ + params = project.CalculatorParams() + + params.title = "molecule demo" + params.comment = f"{self.__class__} {index}" + params.cluster_file = "" + params.output_file = "" + initial_state = self.scans[index.scan].initial_state + params.initial_state = initial_state + emitter = self.scans[index.scan].emitter + params.binding_energy = pt.elements.symbol(emitter).binding_energy[initial_state] + params.polarization = "H" + params.z_surface = model['zsurf'] + params.inner_potential = model['V0'] + params.work_function = 4.5 + params.polar_incidence_angle = 60.0 + params.azimuthal_incidence_angle = 0.0 + params.angular_resolution = model['ares'] + params.experiment_temperature = model['Texp'] + params.debye_temperature = model['Tdeb'] + params.phase_files = self.phase_files + params.rme_files = self.rme_files + # edac_interface only + params.emitters = [] + params.lmax = 15 + params.dmax = model['distm'] + params.orders = [20] + + return params + + def create_model_space(self): + """ + define the range of model parameters. + + see the class description for a list of parameters. + """ + + return self.model_space + + # noinspection PyUnusedLocal + def calc_modulation(self, data, model): + """ + calculate the modulation function with project-specific smoothing factor + + see @ref pmsco.pmsco.project.calc_modulation. + + @param data: (numpy.ndarray) experimental data in ETPI, or ETPAI format. + + @param model: (dict) model parameters of the calculation task. not used. + + @return copy of the data array with the modulation function in the 'i' column. + """ + return calc_modfunc_loess(data, smth=self.modf_smth_ei) + + +def create_model_space(mode): + """ + define the model space. + """ + dom = project.ModelSpace() + + if mode == "single": + dom.add_param('zsurf', 1.20) + dom.add_param('Texp', 300.00) + dom.add_param('Tdeb', 100.00) + dom.add_param('V0', 10.00) + dom.add_param('rmax', 50.00) + dom.add_param('ares', 5.00) + dom.add_param('distm', 5.00) + dom.add_param('wdom1', 1.0) + dom.add_param('wdom2', 1.0) + dom.add_param('wdom3', 1.0) + dom.add_param('wdom4', 1.0) + dom.add_param('wdom5', 1.0) + else: + raise ValueError(f"undefined model space for {mode} optimization") + + return dom + + +def create_project(): + """ + create the project instance. + """ + + proj = MoleculeProject() + proj_dir = os.path.dirname(os.path.abspath(__file__)) + proj.project_dir = proj_dir + + # scan dictionary + # to select any number of scans, add their dictionary keys as scans option on the command line + proj.scan_dict['empty'] = {'filename': os.path.join(proj_dir, "../common/empty-hemiscan.etpi"), + 'emitter': "N", 'initial_state': "1s"} + + proj.mode = 'single' + proj.model_space = create_model_space(proj.mode) + proj.job_name = 'molecule0000' + proj.description = 'molecule demo' + + return proj + + +def set_project_args(project, project_args): + """ + set the project arguments. + + @param project: project instance + + @param project_args: (Namespace object) project arguments. + """ + + scans = [] + try: + if project_args.scans: + scans = project_args.scans + else: + logger.error("missing scan argument") + exit(1) + except AttributeError: + logger.error("missing scan argument") + exit(1) + + for scan_key in scans: + scan_spec = project.scan_dict[scan_key] + project.add_scan(**scan_spec) + + try: + project.cluster_file = os.path.abspath(project_args.cluster_file) + project.cluster_generator = MoleculeFileCluster(project) + except (AttributeError, TypeError): + logger.error("missing cluster-file argument") + exit(1) + + try: + if project_args.emitter: + for scan in project.scans: + scan.emitter = project_args.emitter + logger.warning(f"override emitters of all scans to {project_args.emitter}") + except AttributeError: + pass + + try: + if project_args.initial_state: + for scan in project.scans: + scan.initial_state = project_args.initial_state + logger.warning(f"override initial states of all scans to {project_args.initial_state}") + except AttributeError: + pass + + try: + if project_args.energy: + for scan in project.scans: + scan.energies = np.asarray((project_args.energy, )) + logger.warning(f"override scan energy of all scans to {project_args.energy}") + except AttributeError: + pass + + try: + if project_args.symmetry: + for angle in np.linspace(0, 360, num=project_args.symmetry, endpoint=False): + project.add_domain({'xrot': 0., 'yrot': 0., 'zrot': angle}) + logger.warning(f"override rotation symmetry to {project_args.symmetry}") + except AttributeError: + pass + + +def parse_project_args(_args): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + # main arguments + parser.add_argument('--scans', nargs="*", + help="nick names of scans to use in calculation (see create_project function)") + parser.add_argument('--cluster-file', + help="path name of molecule file (xyz format).") + + # conditional arguments + parser.add_argument('--emitter', + help="emitter: chemical symbol") + parser.add_argument('--initial-state', + help="initial state term: e.g. 2p1/2") + parser.add_argument('--energy', type=float, + help="kinetic energy (eV)") + parser.add_argument('--symmetry', type=int, default=1, + help="n-fold rotational symmetry") + + parsed_args = parser.parse_args(_args) + return parsed_args diff --git a/projects/twoatom/twoatom.py b/projects/twoatom/twoatom.py index 4102902..b6b4bde 100644 --- a/projects/twoatom/twoatom.py +++ b/projects/twoatom/twoatom.py @@ -17,6 +17,9 @@ import numpy as np import os.path import periodictable as pt +from pmsco.calculators.calculator import InternalAtomicCalculator +from pmsco.calculators.edac import EdacCalculator +from pmsco.calculators.phagen.runner import PhagenCalculator import pmsco.cluster as mc import pmsco.project as mp from pmsco.helpers import BraceMessage as BMsg @@ -152,6 +155,17 @@ class TwoatomProject(mp.Project): self.cluster_generator.model_dict['dAB'] = 'dNNi' self.cluster_generator.model_dict['th'] = 'pNNi' self.cluster_generator.model_dict['ph'] = 'aNNi' + self.atomic_scattering_factory = PhagenCalculator + self.multiple_scattering_factory = EdacCalculator + self.phase_files = {} + self.rme_files = {} + self.bindings = {} + self.bindings['N'] = {'1s': 409.9} + self.bindings['B'] = {'1s': 188.0} + self.bindings['Ni'] = {'2s': 1008.6, + '2p': (870.0 + 852.7) / 2, '2p1/2': 870.0, '2p3/2': 852.7, + '3s': 110.8, + '3p': (68.0 + 66.2) / 2, '3p1/2': 68.0, '3p3/2': 66.2} def create_params(self, model, index): """ @@ -159,40 +173,40 @@ class TwoatomProject(mp.Project): @param model: (dict) optimizable parameters """ - params = mp.Params() + params = mp.CalculatorParams() params.title = "two-atom demo" params.comment = "{0} {1}".format(self.__class__, index) params.cluster_file = "" params.output_file = "" params.initial_state = self.scans[index.scan].initial_state - params.spherical_order = 2 + initial_state = self.scans[index.scan].initial_state + params.initial_state = initial_state + emitter = self.scans[index.scan].emitter + params.binding_energy = self.bindings[emitter][initial_state] params.polarization = "H" - params.scattering_level = 5 - params.fcut = 15.0 - params.cut = 15.0 - params.angular_resolution = 0.0 - params.lattice_constant = 1.0 params.z_surface = model['Zsurf'] - params.phase_files = {self.cluster_generator.atom_types['A']: "", - self.cluster_generator.atom_types['B']: ""} - params.msq_displacement = {self.cluster_generator.atom_types['A']: 0.01, - self.cluster_generator.atom_types['B']: 0.0} - params.planewave_attenuation = 1.0 params.inner_potential = model['V0'] params.work_function = 3.6 - params.symmetry_range = 360.0 params.polar_incidence_angle = 60.0 params.azimuthal_incidence_angle = 0.0 - params.vibration_model = "P" - params.substrate_atomic_mass = 58.69 params.experiment_temperature = 300.0 params.debye_temperature = 356.0 - params.debye_wavevector = 1.7558 - params.rme_minus_value = 0.0 + + if self.phase_files: + state = emitter + initial_state + try: + params.phase_files = self.phase_files[state] + except KeyError: + params.phase_files = {} + logger.warning("no phase files found for {} - using default calculator".format(state)) + + params.rme_files = {} + params.rme_minus_value = 0.1 params.rme_minus_shift = 0.0 params.rme_plus_value = 1.0 params.rme_plus_shift = 0.0 + # used by EDAC only params.emitters = [] params.lmax = 15 @@ -201,11 +215,11 @@ class TwoatomProject(mp.Project): return params - def create_domain(self): + def create_model_space(self): """ define the domain of the optimization parameters. """ - dom = mp.Domain() + dom = mp.ModelSpace() if self.mode == "single": dom.add_param('dNNi', 2.109, 2.000, 2.250, 0.050) @@ -308,7 +322,7 @@ def set_project_args(project, project_args): project.add_scan(**scan_spec) logger.info(BMsg("add scan {filename} ({emitter} {initial_state})", **scan_spec)) - project.add_symmetry({'default': 0.0}) + project.add_domain({'default': 0.0}) def parse_project_args(_args): diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a63386d --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +""" +@file package distribution information. + +preliminary - not tested. +""" + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +config = { + 'name': "pmsco", + 'description': "PEARL Multiple-Scattering Cluster Calculation and Structural Optimization", + 'url': "https://git.psi.ch/pearl/pmsco", + 'author': "Matthias Muntwiler", + 'author_email': "matthias.muntwiler@psi.ch", + 'version': '0.1', + 'packages': ['pmsco'], + 'scripts': [], + 'install_requires': ['numpy','periodictable','statsmodels','mpi4py','nose', 'scipy'] +} + +setup(**config) diff --git a/tests/calculators/__init__.py b/tests/calculators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/calculators/phagen/__init__.py b/tests/calculators/phagen/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/calculators/phagen/test_translator.py b/tests/calculators/phagen/test_translator.py new file mode 100644 index 0000000..6e7b4c0 --- /dev/null +++ b/tests/calculators/phagen/test_translator.py @@ -0,0 +1,79 @@ +""" +@package tests.calculators.phagen.test_translator +unit tests for pmsco.calculators.phagen.translator + +the purpose of these tests is to check whether the code runs as expected in a particular environment. + +to run the tests, change to the directory which contains the tests directory, and execute =nosetests=. + +@pre nose must be installed (python-nose package on Debian). + +@author Matthias Muntwiler, matthias.muntwiler@psi.ch + +@copyright (c) 2015-19 by Paul Scherrer Institut @n +Licensed under the Apache License, Version 2.0 (the "License"); @n + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import unittest +from pmsco.calculators.phagen import translator + + +class TestModule(unittest.TestCase): + def setUp(self): + # before each test method + pass + + def tearDown(self): + # after each test method + pass + + @classmethod + def setup_class(cls): + # before any methods in this class + pass + + @classmethod + def teardown_class(cls): + # teardown_class() after any methods in this class + pass + + def test_state_to_edge(self): + self.assertEqual(translator.state_to_edge('1s'), 'k') + self.assertEqual(translator.state_to_edge('2s'), 'l1') + self.assertEqual(translator.state_to_edge('3s'), 'm1') + self.assertEqual(translator.state_to_edge('4s'), 'n1') + self.assertEqual(translator.state_to_edge('5s'), 'o1') + + self.assertEqual(translator.state_to_edge('2p'), 'l2') + self.assertEqual(translator.state_to_edge('3p'), 'm2') + self.assertEqual(translator.state_to_edge('4p'), 'n2') + self.assertEqual(translator.state_to_edge('5p'), 'o2') + + self.assertEqual(translator.state_to_edge('2p1/2'), 'l2') + self.assertEqual(translator.state_to_edge('3p1/2'), 'm2') + self.assertEqual(translator.state_to_edge('4p1/2'), 'n2') + self.assertEqual(translator.state_to_edge('5p1/2'), 'o2') + + self.assertEqual(translator.state_to_edge('2p3/2'), 'l3') + self.assertEqual(translator.state_to_edge('3p3/2'), 'm3') + self.assertEqual(translator.state_to_edge('4p3/2'), 'n3') + self.assertEqual(translator.state_to_edge('5p3/2'), 'o3') + + self.assertEqual(translator.state_to_edge('3d'), 'm4') + self.assertEqual(translator.state_to_edge('4d'), 'n4') + self.assertEqual(translator.state_to_edge('5d'), 'o4') + + self.assertEqual(translator.state_to_edge('3d3/2'), 'm4') + self.assertEqual(translator.state_to_edge('4d3/2'), 'n4') + self.assertEqual(translator.state_to_edge('5d3/2'), 'o4') + + self.assertEqual(translator.state_to_edge('3d5/2'), 'm5') + self.assertEqual(translator.state_to_edge('4d5/2'), 'n5') + self.assertEqual(translator.state_to_edge('5d5/2'), 'o5') diff --git a/tests/test_cluster.py b/tests/test_cluster.py index 24ba119..9dc61f0 100644 --- a/tests/test_cluster.py +++ b/tests/test_cluster.py @@ -175,6 +175,22 @@ class TestClusterFunctions(unittest.TestCase): np.testing.assert_allclose(layers, np.asarray([-0.3, -0.2, -0.1, 0.0, +0.1]), atol=0.001) + def test_get_center(self): + clu = mc.Cluster() + clu.add_atom(1, np.asarray([1, 0, 0]), 0) + clu.add_atom(2, np.asarray([0, 0, 1]), 0) + clu.add_atom(1, np.asarray([0, 1, 0]), 0) + clu.add_atom(2, np.asarray([-1, -1, -1]), 0) + v0 = np.asarray([0, 0, 0]) + v1 = np.asarray([1/2, 1/2, 0]) + v2 = np.asarray([-1/2, -1/2, 0]) + v = clu.get_center() + np.testing.assert_allclose(v, v0, atol=0.001) + v = clu.get_center(element=1) + np.testing.assert_allclose(v, v1, atol=0.001) + v = clu.get_center(element="He") + np.testing.assert_allclose(v, v2, atol=0.001) + def test_relax(self): clu = mc.Cluster() clu.add_atom(1, np.asarray([1, 0, 1]), 0) @@ -184,7 +200,7 @@ class TestClusterFunctions(unittest.TestCase): clu.add_atom(2, np.asarray([0, 1, -3]), 0) idx = clu.relax(-0.3, -0.1, 2) - np.testing.assert_almost_equal(idx, np.asarray([[2, 4]])) + np.testing.assert_almost_equal(idx, np.asarray([2, 4])) np.testing.assert_allclose(clu.get_position(0), np.asarray([1, 0, 1]), atol=1e-6) np.testing.assert_allclose(clu.get_position(1), np.asarray([1, 0, 0]), atol=1e-6) np.testing.assert_allclose(clu.get_position(2), np.asarray([0, 1, -1.1]), atol=1e-6) @@ -224,18 +240,81 @@ class TestClusterFunctions(unittest.TestCase): np.testing.assert_allclose(clu.get_position(1), np.asarray([-1, 0, 0]), atol=1e-6) np.testing.assert_allclose(clu.get_position(2), np.asarray([0, 0, 1]), atol=1e-6) + def test_translate(self): + clu = mc.Cluster() + clu.add_atom(1, np.asarray([1, 0, 0]), 0) + clu.add_atom(2, np.asarray([0, 1, 0]), 0) + clu.add_atom(3, np.asarray([0, 0, 1]), 0) + + v = np.array((0.1, 0.2, 0.3)) + shift = clu.translate(v) + np.testing.assert_allclose(clu.get_position(0), np.asarray([1.1, 0.2, 0.3]), atol=1e-6) + np.testing.assert_allclose(clu.get_position(1), np.asarray([0.1, 1.2, 0.3]), atol=1e-6) + np.testing.assert_allclose(clu.get_position(2), np.asarray([0.1, 0.2, 1.3]), atol=1e-6) + np.testing.assert_allclose(shift, np.asarray([0, 1, 2])) + + shift = clu.translate(v, element=3) + np.testing.assert_allclose(clu.get_position(0), np.asarray([1.1, 0.2, 0.3]), atol=1e-6) + np.testing.assert_allclose(clu.get_position(1), np.asarray([0.1, 1.2, 0.3]), atol=1e-6) + np.testing.assert_allclose(clu.get_position(2), np.asarray([0.2, 0.4, 1.6]), atol=1e-6) + np.testing.assert_allclose(shift, np.asarray([2])) + def test_add_layer(self): clu = mc.Cluster() - # from hbncu project - b_surf = 2.50 - clu.set_rmax(4.0) - b1 = np.array((b_surf, 0.0, 0.0)) - b2 = np.array((b_surf / 2.0, b_surf * math.sqrt(3.0) / 2.0, 0.0)) - a1 = -10.0 * b1 - 10.0 * b2 - emitter = np.array((0.0, 0.0, 0.0)) + clu.set_rmax(2.0) + b1 = np.array((1.0, 0.0, 0.0)) + b2 = np.array((0.0, 1.0, 0.0)) + a1 = np.array((0.1, 0.0, -0.1)) clu.add_layer(7, a1, b1, b2) - pos = clu.find_positions(pos=emitter) - self.assertEqual(1, len(pos)) + + exp_pos = [[-0.9, -1.0, -0.1], [0.1, -1.0, -0.1], [1.1, -1.0, -0.1], + [-1.9, 0.0, -0.1], [-0.9, 0.0, -0.1], [0.1, 0.0, -0.1], [1.1, 0.0, -0.1], + [-0.9, 1.0, -0.1], [0.1, 1.0, -0.1], [1.1, 1.0, -0.1]] + + nn = len(exp_pos) + self.assertEqual(nn, clu.data.shape[0]) + self.assertEqual(nn, clu.get_atom_count()) + act_pos = np.sort(clu.get_positions(), axis=0) + exp_pos = np.sort(np.array(exp_pos), axis=0) + np.testing.assert_allclose(act_pos, exp_pos) + act_idx = np.unique(clu.data['i']) + self.assertEqual(nn, act_idx.shape[0]) + act_typ = (clu.data['t'] == 7).nonzero()[0] + self.assertEqual(nn, act_typ.shape[0]) + + def test_add_bulk(self): + clu = mc.Cluster() + clu.set_rmax(2.0) + b1 = np.array((1.0, 0.0, 0.0)) + b2 = np.array((0.0, 1.0, 0.0)) + b3 = np.array((0.0, 0.0, 1.0)) + a1 = np.array((0.1, 0.0, -0.1)) + z_surf = 0.8 + clu.add_bulk(7, a1, b1, b2, b3, z_surf=z_surf) + + r_great = max(clu.rmax, np.linalg.norm(a1)) + n1 = max(int(r_great / np.linalg.norm(b1)) + 1, 4) * 3 + n2 = max(int(r_great / np.linalg.norm(b2)) + 1, 4) * 3 + n3 = max(int(r_great / np.linalg.norm(b3)) + 1, 4) * 3 + exp_pos = [] + nn = 0 + for i1 in range(-n1, n1 + 1): + for i2 in range(-n2, n2 + 1): + for i3 in range(-n3, n3 + 1): + v = a1 + b1 * i1 + b2 * i2 + b3 * i3 + if np.linalg.norm(v) <= clu.rmax and v[2] <= z_surf: + exp_pos.append(v) + nn += 1 + + self.assertEqual(nn, clu.data.shape[0]) + self.assertEqual(nn, clu.get_atom_count()) + act_pos = np.sort(clu.get_positions(), axis=0) + exp_pos = np.sort(np.array(exp_pos), axis=0) + np.testing.assert_allclose(act_pos, exp_pos) + act_idx = np.unique(clu.data['i']) + self.assertEqual(nn, act_idx.shape[0]) + act_typ = (clu.data['t'] == 7).nonzero()[0] + self.assertEqual(nn, act_typ.shape[0]) def test_add_cluster(self): clu1 = mc.Cluster() @@ -375,6 +454,18 @@ class TestClusterFunctions(unittest.TestCase): f = BytesIO() pos = np.asarray((-1, -1, 0)) clu.set_emitter(pos=pos) + clu.save_to_file(f, mc.FMT_PMSCO, "qwerty", emitters_only=True) + f.seek(0) + line = f.readline() + self.assertEqual(line, b"# index element symbol class x y z emitter charge\n", b"line 1: " + line) + line = f.readline() + self.assertRegexpMatches(line, b"[0-9]+ +1 +H +[0-9]+ +[0.]+ +[0.]+ +[0.]+ +1 +[0.]", b"line 3: " + line) + line = f.readline() + self.assertRegexpMatches(line, b"[0-9]+ +14 +Si +[0-9]+ +[01.-]+ +[01.-]+ +[0.]+ +1 +[0.]", b"line 4: " + line) + line = f.readline() + self.assertEqual(b"", line, b"end of file") + + f = BytesIO() clu.save_to_file(f, mc.FMT_XYZ, "qwerty", emitters_only=True) f.seek(0) line = f.readline() @@ -388,6 +479,37 @@ class TestClusterFunctions(unittest.TestCase): line = f.readline() self.assertEqual(b"", line, b"end of file") + def test_load_from_file(self): + f = BytesIO() + f.write(b"2\n") + f.write(b"qwerty\n") + f.write(b"H 0.5 0.6 0.7\n") + f.write(b"Si -1.5 -1.6 -1.7\n") + f.seek(0) + clu = mc.Cluster() + clu.load_from_file(f, fmt=mc.FMT_XYZ) + np.testing.assert_allclose(clu.data['t'], np.array([1, 14])) + np.testing.assert_allclose(clu.data['x'], np.array([0.5, -1.5])) + np.testing.assert_allclose(clu.data['y'], np.array([0.6, -1.6])) + np.testing.assert_allclose(clu.data['z'], np.array([0.7, -1.7])) + np.testing.assert_allclose(clu.data['e'], np.array([0, 0])) + np.testing.assert_allclose(clu.data['q'], np.array([0, 0])) + + f = BytesIO() + f.write(b"# index element symbol class x y z emitter charge\n") + # ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q'] + f.write(b"1 6 C 1 0.5 0.6 0.7 0 -0.5\n") + f.write(b"2 14 Si 2 -1.5 -1.6 -1.7 1 0.5\n") + f.seek(0) + clu = mc.Cluster() + clu.load_from_file(f, fmt=mc.FMT_PMSCO) + np.testing.assert_allclose(clu.data['t'], np.array([6, 14])) + np.testing.assert_allclose(clu.data['x'], np.array([0.5, -1.5])) + np.testing.assert_allclose(clu.data['y'], np.array([0.6, -1.6])) + np.testing.assert_allclose(clu.data['z'], np.array([0.7, -1.7])) + np.testing.assert_allclose(clu.data['e'], np.array([0, 1])) + np.testing.assert_allclose(clu.data['q'], np.array([-0.5, 0.5])) + def test_update_atoms(self): clu = mc.Cluster() clu.add_atom(1, np.asarray([0, 0, 0]), 1) @@ -409,3 +531,29 @@ class TestClusterFunctions(unittest.TestCase): clu.update_atoms(other, {'c'}) expected = np.asarray((1, 3, 2, 3, 2, 4)) np.testing.assert_array_equal(expected, clu.data['c']) + + def test_calc_scattering_angles(self): + clu = mc.Cluster() + ref_em = np.asarray([0.1, -0.1, 0.5]) + ref_th = np.asarray([0., 15., 90., 100., 120.]) + ref_ph = np.asarray([0., 90., 180., 270., 360.]) + ref_di = np.asarray([0.5, 1.0, 1.5, 2.0, 2.5]) + exp_th = ref_th[0:4] + exp_ph = ref_ph[0:4] + exp_di = ref_di[0:4] + sel_ph = exp_ph > 180. + exp_ph[sel_ph] = exp_ph[sel_ph] - 360. + + idx_em = clu.add_atom(1, ref_em, 1) + for i, r in enumerate(ref_di): + v = np.asarray([ + r * math.cos(math.radians(ref_ph[i])) * math.sin(math.radians(ref_th[i])) + ref_em[0], + r * math.sin(math.radians(ref_ph[i])) * math.sin(math.radians(ref_th[i])) + ref_em[1], + r * math.cos(math.radians(ref_th[i])) + ref_em[2]]) + clu.add_atom(i, v, 0) + + result = clu.calc_scattering_angles(idx_em, 2.2) + np.testing.assert_allclose(result['index'], np.arange(1, exp_di.shape[0] + 1)) + np.testing.assert_allclose(result['polar'], exp_th, atol=1e-3) + np.testing.assert_allclose(result['azimuth'], exp_ph, atol=1e-3) + np.testing.assert_allclose(result['dist'], exp_di, rtol=1e-5) diff --git a/tests/test_database.py b/tests/test_database.py index 8354173..71eaa38 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -77,7 +77,7 @@ class TestDatabase(unittest.TestCase): cid1 = dispatch.CalcID(1, 2, 3, 4, -1) cid2 = db.special_params(cid1) - cid3 = {'model': 1, 'scan': 2, 'sym': 3, 'emit': 4, 'region': -1} + cid3 = {'model': 1, 'scan': 2, 'domain': 3, 'emit': 4, 'region': -1} self.assertEqual(cid2, cid3) l1 = d1.keys() @@ -91,6 +91,7 @@ class TestDatabase(unittest.TestCase): self.assertEqual(t2, t3) def setup_sample_database(self): + self.db.register_project("oldproject", "oldcode") self.db.register_project("unittest", "testcode") self.db.register_job(self.db.project_id, "testjob", "testmode", "testhost", None, datetime.datetime.now()) self.ex_model = {'parA': 1.234, 'parB': 5.678, '_model': 91, '_rfac': 0.534} @@ -101,10 +102,13 @@ class TestDatabase(unittest.TestCase): def test_register_project(self): id1 = self.db.register_project("unittest1", "Atest") self.assertIsInstance(id1, int) + self.assertEqual(id1, self.db.project_id) id2 = self.db.register_project("unittest2", "Btest") self.assertIsInstance(id2, int) + self.assertEqual(id2, self.db.project_id) id3 = self.db.register_project("unittest1", "Ctest") self.assertIsInstance(id3, int) + self.assertEqual(id3, self.db.project_id) self.assertNotEqual(id1, id2) self.assertEqual(id1, id3) c = self.db._conn.cursor() @@ -120,6 +124,47 @@ class TestDatabase(unittest.TestCase): self.assertEqual(row['name'], "unittest1") self.assertEqual(row['code'], "Atest") + def test_register_job(self): + pid1 = self.db.register_project("unittest1", "Acode") + pid2 = self.db.register_project("unittest2", "Bcode") + dt1 = datetime.datetime.now() + + # insert new job + id1 = self.db.register_job(pid1, "Ajob", "Amode", "local", "Ahash", dt1, "Adesc") + self.assertIsInstance(id1, int) + self.assertEqual(id1, self.db.job_id) + # insert another job + id2 = self.db.register_job(pid1, "Bjob", "Amode", "local", "Ahash", dt1, "Adesc") + self.assertIsInstance(id2, int) + self.assertEqual(id2, self.db.job_id) + # update first job + id3 = self.db.register_job(pid1, "Ajob", "Cmode", "local", "Chash", dt1, "Cdesc") + self.assertIsInstance(id3, int) + self.assertEqual(id3, self.db.job_id) + # insert another job with same name but in other project + id4 = self.db.register_job(pid2, "Ajob", "Dmode", "local", "Dhash", dt1, "Ddesc") + self.assertIsInstance(id4, int) + self.assertEqual(id4, self.db.job_id) + + self.assertNotEqual(id1, id2) + self.assertEqual(id1, id3) + self.assertNotEqual(id1, id4) + + c = self.db._conn.cursor() + c.execute("select count(*) from Jobs") + count = c.fetchone() + self.assertEqual(count[0], 3) + c.execute("select name, mode, machine, git_hash, datetime, description from Jobs where id=:id", {'id': id1}) + row = c.fetchone() + self.assertIsNotNone(row) + self.assertEqual(len(row), 6) + self.assertEqual(row[0], "Ajob") + self.assertEqual(row[1], "Amode") + self.assertEqual(row['machine'], "local") + self.assertEqual(str(row['datetime']), str(dt1)) + self.assertEqual(row['git_hash'], "Ahash") + self.assertEqual(row['description'], "Adesc") + def test_register_params(self): self.setup_sample_database() model5 = {'parA': 2.341, 'parC': 6.785, '_model': 92, '_rfac': 0.453} @@ -181,7 +226,7 @@ class TestDatabase(unittest.TestCase): def test_query_model_array(self): self.setup_sample_database() - index = {'_scan': -1, '_sym': -1, '_emit': -1, '_region': -1} + index = {'_scan': -1, '_domain': -1, '_emit': -1, '_region': -1} model2 = {'parA': 4.123, 'parB': 8.567, '_model': 92, '_rfac': 0.654} model3 = {'parA': 3.412, 'parB': 7.856, '_model': 93, '_rfac': 0.345} model4 = {'parA': 4.123, 'parB': 8.567, '_model': 94, '_rfac': 0.354} @@ -234,12 +279,12 @@ class TestDatabase(unittest.TestCase): model7 = {'parA': 5.123, 'parB': 6.567, '_model': 97, '_rfac': 0.154, '_gen': 1, '_particle': 7} self.db.register_params(model5) self.db.create_models_view() - model2.update({'_scan': -1, '_sym': 11, '_emit': 21, '_region': 31}) - model3.update({'_scan': 1, '_sym': 12, '_emit': 22, '_region': 32}) - model4.update({'_scan': 2, '_sym': 11, '_emit': 23, '_region': 33}) - model5.update({'_scan': 3, '_sym': 11, '_emit': 24, '_region': 34}) - model6.update({'_scan': 4, '_sym': 11, '_emit': 25, '_region': 35}) - model7.update({'_scan': 5, '_sym': -1, '_emit': -1, '_region': -1}) + model2.update({'_scan': -1, '_domain': 11, '_emit': 21, '_region': 31}) + model3.update({'_scan': 1, '_domain': 12, '_emit': 22, '_region': 32}) + model4.update({'_scan': 2, '_domain': 11, '_emit': 23, '_region': 33}) + model5.update({'_scan': 3, '_domain': 11, '_emit': 24, '_region': 34}) + model6.update({'_scan': 4, '_domain': 11, '_emit': 25, '_region': 35}) + model7.update({'_scan': 5, '_domain': -1, '_emit': -1, '_region': -1}) self.db.insert_result(model2, model2) self.db.insert_result(model3, model3) self.db.insert_result(model4, model4) @@ -248,12 +293,12 @@ class TestDatabase(unittest.TestCase): self.db.insert_result(model7, model7) # only model3, model4 and model5 fulfill all conditions and limits - fil = ['mode = "testmode"', 'sym = 11'] + fil = ['mode = "testmode"', 'domain = 11'] lim = 3 result = self.db.query_best_results(filter=fil, limit=lim) ifields = ['_db_job', '_db_model', '_db_result', - '_model', '_scan', '_sym', '_emit', '_region', + '_model', '_scan', '_domain', '_emit', '_region', '_gen', '_particle'] ffields = ['_rfac'] dt = [(f, 'i8') for f in ifields] @@ -262,7 +307,7 @@ class TestDatabase(unittest.TestCase): expected['_rfac'] = np.array([0.354, 0.354, 0.453]) expected['_model'] = np.array([94, 96, 95]) expected['_scan'] = np.array([2, 4, 3]) - expected['_sym'] = np.array([11, 11, 11]) + expected['_domain'] = np.array([11, 11, 11]) expected['_emit'] = np.array([23, 25, 24]) expected['_region'] = np.array([33, 35, 34]) expected['_gen'] = np.array([1, 1, 1]) @@ -272,7 +317,7 @@ class TestDatabase(unittest.TestCase): np.testing.assert_array_almost_equal(result['_rfac'], expected['_rfac']) np.testing.assert_array_equal(result['_model'], expected['_model']) np.testing.assert_array_equal(result['_scan'], expected['_scan']) - np.testing.assert_array_equal(result['_sym'], expected['_sym']) + np.testing.assert_array_equal(result['_domain'], expected['_domain']) np.testing.assert_array_equal(result['_emit'], expected['_emit']) np.testing.assert_array_equal(result['_region'], expected['_region']) np.testing.assert_array_equal(result['_gen'], expected['_gen']) @@ -296,7 +341,7 @@ class TestDatabase(unittest.TestCase): model_id = row['model_id'] self.assertIsInstance(model_id, int) self.assertEqual(row['scan'], index.scan) - self.assertEqual(row['sym'], index.sym) + self.assertEqual(row['domain'], index.domain) self.assertEqual(row['emit'], index.emit) self.assertEqual(row['region'], index.region) self.assertEqual(row['rfac'], result['_rfac']) @@ -342,7 +387,7 @@ class TestDatabase(unittest.TestCase): model_id = row['model_id'] self.assertIsInstance(model_id, int) self.assertEqual(row['scan'], index.scan) - self.assertEqual(row['sym'], index.sym) + self.assertEqual(row['domain'], index.domain) self.assertEqual(row['emit'], index.emit) self.assertEqual(row['region'], index.region) self.assertEqual(row['rfac'], result2['_rfac']) @@ -372,7 +417,7 @@ class TestDatabase(unittest.TestCase): @return: """ self.setup_sample_database() - index = {'_model': 15, '_scan': 16, '_sym': 17, '_emit': 18, '_region': -1} + index = {'_model': 15, '_scan': 16, '_domain': 17, '_emit': 18, '_region': -1} result1 = {'parA': 4.123, 'parB': 8.567, '_rfac': 0.654, '_particle': 21} result_id1 = self.db.insert_result(index, result1) result2 = {'parA': 5.456, '_rfac': 0.254, '_particle': 11} @@ -393,7 +438,7 @@ class TestDatabase(unittest.TestCase): model_id = row['model_id'] self.assertIsInstance(model_id, int) self.assertEqual(row['scan'], index['_scan']) - self.assertEqual(row['sym'], index['_sym']) + self.assertEqual(row['domain'], index['_domain']) self.assertEqual(row['emit'], index['_emit']) self.assertEqual(row['region'], index['_region']) self.assertEqual(row['rfac'], result2['_rfac']) @@ -418,19 +463,19 @@ class TestDatabase(unittest.TestCase): def test_query_best_task_models(self): self.setup_sample_database() - model0xxx = {'_model': 0, '_scan': -1, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.01} - model00xx = {'_model': 1, '_scan': 0, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.02} - model000x = {'_model': 2, '_scan': 0, '_sym': 0, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.03} - model01xx = {'_model': 3, '_scan': 1, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.04} - model010x = {'_model': 4, '_scan': 1, '_sym': 0, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.05} + model0xxx = {'_model': 0, '_scan': -1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.01} + model00xx = {'_model': 1, '_scan': 0, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.02} + model000x = {'_model': 2, '_scan': 0, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.03} + model01xx = {'_model': 3, '_scan': 1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.04} + model010x = {'_model': 4, '_scan': 1, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4., 'parB': 8.567, '_rfac': 0.05} - model1xxx = {'_model': 5, '_scan': -1, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.09} - model10xx = {'_model': 6, '_scan': 0, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.08} - model100x = {'_model': 7, '_scan': 0, '_sym': 0, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.07} - model11xx = {'_model': 8, '_scan': 1, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.06} - model110x = {'_model': 9, '_scan': 1, '_sym': 0, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.05} + model1xxx = {'_model': 5, '_scan': -1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.09} + model10xx = {'_model': 6, '_scan': 0, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.08} + model100x = {'_model': 7, '_scan': 0, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.07} + model11xx = {'_model': 8, '_scan': 1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.06} + model110x = {'_model': 9, '_scan': 1, '_domain': 0, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.05} - model2xxx = {'_model': 10, '_scan': -1, '_sym': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.01} + model2xxx = {'_model': 10, '_scan': -1, '_domain': -1, '_emit': -1, '_region': -1, 'parA': 4.123, 'parB': 8.567, '_rfac': 0.01} self.db.insert_result(model0xxx, model0xxx) self.db.insert_result(model00xx, model00xx) @@ -451,6 +496,75 @@ class TestDatabase(unittest.TestCase): expected = {0, 1, 3, 6, 8, 10} self.assertEqual(result, expected) + def test_sample_project(self): + """ + test ingestion of two results + + this test uses the same call sequence as the actual pmsco code. + it has been used to debug a problem in the main code + where prevous results were overwritten. + """ + db_filename = os.path.join(self.test_dir, "sample_database.db") + lock_filename = os.path.join(self.test_dir, "sample_database.lock") + + # project + project_name = self.__class__.__name__ + project_module = self.__class__.__module__ + + # job 1 + job_name1 = "job1" + result1 = {'parA': 1.234, 'parB': 5.678, '_model': 91, '_rfac': 0.534} + task1 = dispatch.CalcID(91, -1, -1, -1, -1) + + # ingest job 1 + _db = db.ResultsDatabase() + _db.connect(db_filename, lock_filename=lock_filename) + project_id1 = _db.register_project(project_name, project_module) + job_id1 = _db.register_job(project_id1, job_name1, "test", "localhost", "", datetime.datetime.now(), "") + # _db.insert_jobtags(job_id, self.job_tags) + _db.register_params(result1.keys()) + _db.create_models_view() + result_id1 = _db.insert_result(task1, result1) + _db.disconnect() + + # job 2 + job_name2 = "job2" + result2 = {'parA': 1.345, 'parB': 5.789, '_model': 91, '_rfac': 0.654} + task2 = dispatch.CalcID(91, -1, -1, -1, -1) + + # ingest job 2 + _db = db.ResultsDatabase() + _db.connect(db_filename, lock_filename=lock_filename) + project_id2 = _db.register_project(project_name, project_module) + job_id2 = _db.register_job(project_id2, job_name2, "test", "localhost", "", datetime.datetime.now(), "") + # _db.insert_jobtags(job_id, self.job_tags) + _db.register_params(result2.keys()) + _db.create_models_view() + result_id2 = _db.insert_result(task2, result2) + _db.disconnect() + + # check jobs + _db = db.ResultsDatabase() + _db.connect(db_filename, lock_filename=lock_filename) + sql = "select * from Jobs " + c = _db._conn.execute(sql) + rows = c.fetchall() + self.assertEqual(len(rows), 2) + + # check models + sql = "select * from Models " + c = _db._conn.execute(sql) + rows = c.fetchall() + self.assertEqual(len(rows), 2) + + # check results + sql = "select * from Results " + c = _db._conn.execute(sql) + rows = c.fetchall() + self.assertEqual(len(rows), 2) + + _db.disconnect() + if __name__ == '__main__': unittest.main() diff --git a/tests/test_dispatch.py b/tests/test_dispatch.py index ce620fc..c376c64 100644 --- a/tests/test_dispatch.py +++ b/tests/test_dispatch.py @@ -99,7 +99,7 @@ class TestCalculationTask(unittest.TestCase): def test_get_mpi_message(self): result = self.sample.get_mpi_message() - expected = {'model': 11, 'scan': 12, 'sym': 13, 'emit': 14, 'region': 15} + expected = {'model': 11, 'scan': 12, 'domain': 13, 'emit': 14, 'region': 15} self.assertEqual(result['id'], expected) self.assertEqual(result['model'], self.sample.model) self.assertEqual(result['result_filename'], self.sample.result_filename) diff --git a/tests/test_genetic.py b/tests/test_genetic.py index e0f192e..bae3894 100644 --- a/tests/test_genetic.py +++ b/tests/test_genetic.py @@ -39,11 +39,11 @@ class TestPopulation(unittest.TestCase): def setUp(self): random.seed(0) self._test_dir = "" - self.domain = mp.Domain() + self.model_space = mp.ModelSpace() - self.domain.add_param('A', 1.5, 1.0, 2.0, 0.1) - self.domain.add_param('B', 2.5, 2.0, 3.0, 0.1) - self.domain.add_param('C', 3.5, 3.0, 4.0, 0.1) + self.model_space.add_param('A', 1.5, 1.0, 2.0, 0.1) + self.model_space.add_param('B', 2.5, 2.0, 3.0, 0.1) + self.model_space.add_param('C', 3.5, 3.0, 4.0, 0.1) self.expected_names = ('_gen', '_model', '_particle', '_rfac', 'A', 'B', 'C') self.size = POP_SIZE @@ -114,7 +114,7 @@ class TestPopulation(unittest.TestCase): return r def test_setup(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) np.testing.assert_array_equal(np.arange(POP_SIZE), self.pop.pos['_particle']) @@ -131,7 +131,7 @@ class TestPopulation(unittest.TestCase): def test_setup_with_results(self): data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "test_swarm.setup_with_results.1.dat") - self.pop.setup(self.size, self.domain, seed_file=data_file, recalc_seed=False) + self.pop.setup(self.size, self.model_space, seed_file=data_file, recalc_seed=False) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) @@ -158,7 +158,7 @@ class TestPopulation(unittest.TestCase): def test_setup_with_results_recalc(self): data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "test_swarm.setup_with_results.1.dat") - self.pop.setup(self.size, self.domain, seed_file=data_file, recalc_seed=True) + self.pop.setup(self.size, self.model_space, seed_file=data_file, recalc_seed=True) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) @@ -183,26 +183,26 @@ class TestPopulation(unittest.TestCase): self.assertAlmostEqual(3.5, self.pop.pos['C'][0], 3) def test_pos_gen(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) for index, item in enumerate(self.pop.pos_gen()): self.assertIsInstance(item, dict) self.assertEqual(set(item.keys()), set(self.expected_names)) self.assertEqual(item['_particle'], index) def test_randomize(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.randomize() - self.assertTrue(np.all(self.pop.pos['A'] >= self.domain.min['A'])) - self.assertTrue(np.all(self.pop.pos['A'] <= self.domain.max['A'])) - self.assertGreater(np.std(self.pop.pos['A']), self.domain.step['A']) + self.assertTrue(np.all(self.pop.pos['A'] >= self.model_space.min['A'])) + self.assertTrue(np.all(self.pop.pos['A'] <= self.model_space.max['A'])) + self.assertGreater(np.std(self.pop.pos['A']), self.model_space.step['A']) def test_seed(self): - self.pop.setup(self.size, self.domain) - self.pop.seed(self.domain.start) - self.assertAlmostEqual(self.pop.pos['A'][0], self.domain.start['A'], delta=0.001) + self.pop.setup(self.size, self.model_space) + self.pop.seed(self.model_space.start) + self.assertAlmostEqual(self.pop.pos['A'][0], self.model_space.start['A'], delta=0.001) def test_add_result(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) i_sample = 1 i_result = 0 result = self.pop.pos[i_sample] @@ -212,7 +212,7 @@ class TestPopulation(unittest.TestCase): self.assertEqual(self.pop.best[i_sample], result) def test_is_converged(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.assertFalse(self.pop.is_converged()) i_sample = 0 result = self.pop.pos[i_sample] @@ -226,12 +226,12 @@ class TestPopulation(unittest.TestCase): self.assertTrue(self.pop.is_converged()) def test_save_population(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) filename = os.path.join(self.test_dir, "test_save_population.pop") self.pop.save_population(filename) def test_save_results(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) i_sample = 1 result = self.pop.pos[i_sample] self.pop.add_result(result, 1.0) @@ -239,17 +239,17 @@ class TestPopulation(unittest.TestCase): self.pop.save_results(filename) def test_save_array(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) filename = os.path.join(self.test_dir, "test_save_array.pos") self.pop.save_array(filename, self.pop.pos) def test_load_array(self): n = 3 filename = os.path.join(self.test_dir, "test_load_array") - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) # expected array - dt_exp = self.pop.get_pop_dtype(self.domain.start) + dt_exp = self.pop.get_pop_dtype(self.model_space.start) a_exp = np.zeros((n,), dtype=dt_exp) a_exp['A'] = np.linspace(0, 1, n) a_exp['B'] = np.linspace(1, 2, n) @@ -276,13 +276,13 @@ class TestPopulation(unittest.TestCase): np.testing.assert_almost_equal(result[name], a_exp[name], err_msg=name) def test_mate_parents(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) pos1 = self.pop.pos.copy() parents = self.pop.mate_parents(pos1) self.assertEqual(len(parents), pos1.shape[0] / 2) def test_crossover(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) p1 = self.pop.pos[2].copy() p2 = self.pop.pos[3].copy() c1, c2 = self.pop.crossover(p1, p2) @@ -290,11 +290,11 @@ class TestPopulation(unittest.TestCase): self.assertIsInstance(c2, np.void) self.assertEqual(c1['_particle'], p1['_particle']) self.assertEqual(c2['_particle'], p2['_particle']) - for name in self.domain.start: + for name in self.model_space.start: self.assertAlmostEqual(c1[name] + c2[name], p1[name] + p2[name], msg=name) def test_mutate_weak(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) p1 = self.pop.pos[3].copy() c1 = p1.copy() self.pop.mutate_weak(c1, 1.0) @@ -304,7 +304,7 @@ class TestPopulation(unittest.TestCase): self.assertNotAlmostEqual(c1['C'], p1['C']) def test_mutate_strong(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) p1 = self.pop.pos[3].copy() c1 = p1.copy() self.pop.mutate_strong(c1, 1.0) @@ -314,7 +314,7 @@ class TestPopulation(unittest.TestCase): self.assertNotAlmostEqual(c1['C'], p1['C']) def test_advance_population(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) p1 = {'A': np.linspace(1.0, 2.0, POP_SIZE), 'B': np.linspace(2.0, 3.0, POP_SIZE), @@ -335,7 +335,7 @@ class TestPopulation(unittest.TestCase): self.assertTrue(np.any(abs(self.pop.pos[name] - value) >= 0.001), msg=name) def test_convergence_1(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.pos['A'] = np.linspace(1.0, 2.0, POP_SIZE) self.pop.pos['B'] = np.linspace(2.0, 3.0, POP_SIZE) @@ -352,7 +352,7 @@ class TestPopulation(unittest.TestCase): def optimize_rfactor_2(self, pop_size, iterations): self.size = pop_size - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) for i in range(iterations): self.pop.advance_population() diff --git a/tests/test_grid.py b/tests/test_grid.py index 0b76ba7..fa7d04f 100644 --- a/tests/test_grid.py +++ b/tests/test_grid.py @@ -32,11 +32,11 @@ import pmsco.project as mp class TestPopulation(unittest.TestCase): def setUp(self): random.seed(0) - self.domain = mp.Domain() + self.model_space = mp.ModelSpace() - self.domain.add_param('A', 1.5, 1.0, 2.0, 0.2) - self.domain.add_param('B', 2.5, 2.0, 3.0, 0.25) - self.domain.add_param('C', 3.5, 3.5, 3.5, 0.0) + self.model_space.add_param('A', 1.5, 1.0, 2.0, 0.2) + self.model_space.add_param('B', 2.5, 2.0, 3.0, 0.25) + self.model_space.add_param('C', 3.5, 3.5, 3.5, 0.0) self.expected_popsize = 30 self.expected_names = ('_model', '_rfac', 'A', 'B', 'C') @@ -57,7 +57,7 @@ class TestPopulation(unittest.TestCase): pass def test_setup(self): - self.pop.setup(self.domain) + self.pop.setup(self.model_space) self.assertEqual(self.pop.positions.dtype.names, self.expected_names) self.assertEqual(self.pop.positions.shape, (self.expected_popsize,)) self.assertEqual(self.pop.model_count, self.expected_popsize) diff --git a/tests/test_population.py b/tests/test_population.py index 0608cad..a4fd1e0 100644 --- a/tests/test_population.py +++ b/tests/test_population.py @@ -41,11 +41,11 @@ class TestPopulation(unittest.TestCase): def setUp(self): random.seed(0) self.test_dir = tempfile.mkdtemp() - self.domain = project.Domain() + self.model_space = project.ModelSpace() - self.domain.add_param('A', 1.5, 1.0, 2.0, 0.1) - self.domain.add_param('B', 2.5, 2.0, 3.0, 0.1) - self.domain.add_param('C', 3.5, 3.0, 4.0, 0.1) + self.model_space.add_param('A', 1.5, 1.0, 2.0, 0.1) + self.model_space.add_param('B', 2.5, 2.0, 3.0, 0.1) + self.model_space.add_param('C', 3.5, 3.0, 4.0, 0.1) self.expected_names = ('_gen', '_model', '_particle', '_rfac', 'A', 'B', 'C') self.size = POP_SIZE @@ -115,7 +115,7 @@ class TestPopulation(unittest.TestCase): return r def test_setup(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) np.testing.assert_array_equal(np.arange(POP_SIZE), self.pop.pos['_particle']) @@ -132,7 +132,7 @@ class TestPopulation(unittest.TestCase): def test_setup_with_results(self): data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "test_swarm.setup_with_results.1.dat") - self.pop.setup(self.size, self.domain, seed_file=data_file, recalc_seed=False) + self.pop.setup(self.size, self.model_space, seed_file=data_file, recalc_seed=False) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) @@ -159,7 +159,7 @@ class TestPopulation(unittest.TestCase): def test_setup_with_results_recalc(self): data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "test_swarm.setup_with_results.1.dat") - self.pop.setup(self.size, self.domain, seed_file=data_file, recalc_seed=True) + self.pop.setup(self.size, self.model_space, seed_file=data_file, recalc_seed=True) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) @@ -184,12 +184,12 @@ class TestPopulation(unittest.TestCase): self.assertAlmostEqual(3.5, self.pop.pos['C'][0], 3) def test_setup_with_partial_results(self): - self.domain.add_param('D', 4.5, 4.0, 5.0, 0.1) + self.model_space.add_param('D', 4.5, 4.0, 5.0, 0.1) self.expected_names = ('_gen', '_model', '_particle', '_rfac', 'A', 'B', 'C', 'D') data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "test_swarm.setup_with_results.1.dat") - self.pop.setup(self.size, self.domain, seed_file=data_file, recalc_seed=False) + self.pop.setup(self.size, self.model_space, seed_file=data_file, recalc_seed=False) self.assertEqual(self.pop.pos.dtype.names, self.expected_names) self.assertEqual(self.pop.pos.shape, (POP_SIZE,)) @@ -214,7 +214,7 @@ class TestPopulation(unittest.TestCase): self.assertAlmostEqual(3.5, self.pop.pos['C'][0], 3) def test_pos_gen(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) for index, item in enumerate(self.pop.pos_gen()): self.assertIsInstance(item, dict) self.assertEqual(set(item.keys()), set(self.expected_names)) @@ -242,19 +242,19 @@ class TestPopulation(unittest.TestCase): np.testing.assert_array_equal(result, expected) def test_randomize(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.randomize() m = np.mean(self.pop.pos['A']) - self.assertGreaterEqual(m, self.domain.min['A']) - self.assertLessEqual(m, self.domain.max['A']) + self.assertGreaterEqual(m, self.model_space.min['A']) + self.assertLessEqual(m, self.model_space.max['A']) def test_seed(self): - self.pop.setup(self.size, self.domain) - self.pop.seed(self.domain.start) - self.assertAlmostEqual(self.pop.pos['A'][0], self.domain.start['A'], delta=0.001) + self.pop.setup(self.size, self.model_space) + self.pop.seed(self.model_space.start) + self.assertAlmostEqual(self.pop.pos['A'][0], self.model_space.start['A'], delta=0.001) def test_add_result(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) i_sample = 1 i_result = 0 result = self.pop.pos[i_sample] @@ -264,12 +264,12 @@ class TestPopulation(unittest.TestCase): self.assertEqual(self.pop.best[i_sample], result) def test_save_population(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) filename = os.path.join(self.test_dir, "test_save_population.pop") self.pop.save_population(filename) def test_save_results(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) i_sample = 1 result = self.pop.pos[i_sample] self.pop.add_result(result, 1.0) @@ -277,17 +277,17 @@ class TestPopulation(unittest.TestCase): self.pop.save_results(filename) def test_save_array(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) filename = os.path.join(self.test_dir, "test_save_array.pos") self.pop.save_array(filename, self.pop.pos) def test_load_array(self): n = 3 filename = os.path.join(self.test_dir, "test_load_array") - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) # expected array - dt_exp = self.pop.get_pop_dtype(self.domain.start) + dt_exp = self.pop.get_pop_dtype(self.model_space.start) a_exp = np.zeros((n,), dtype=dt_exp) a_exp['A'] = np.linspace(0, 1, n) a_exp['B'] = np.linspace(1, 2, n) @@ -395,7 +395,7 @@ class TestPopulation(unittest.TestCase): self.assertRaises(ValueError, population.Population.constrain_position, pos1, vel1, min1, max1, 'error') def test_patch_from_file(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) data_dir = os.path.dirname(os.path.abspath(__file__)) data_file = os.path.join(data_dir, "test_swarm.setup_with_results.1.dat") @@ -411,7 +411,7 @@ class TestPopulation(unittest.TestCase): np.testing.assert_array_almost_equal(self.pop.pos_patch['C'], expected_pos['C']) def test_apply_patch(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) expected_pos = self.pop.pos.copy() dt_test = [('A', 'f4'), ('_particle', 'i4'), ('_rfac', 'f4'), ('C', 'f4'), ('_model', 'i4')] patch_size = 3 @@ -436,14 +436,14 @@ class TestPopulation(unittest.TestCase): self.assert_pop_array_equal(self.pop.pos, expected_pos) def test_find_result(self): - self.domain.min['A'] = -0.1 - self.domain.max['A'] = 0.1 - self.domain.min['B'] = 0.0 - self.domain.max['B'] = 1000. - self.domain.min['C'] = 9. - self.domain.max['C'] = 9.001 + self.model_space.min['A'] = -0.1 + self.model_space.max['A'] = 0.1 + self.model_space.min['B'] = 0.0 + self.model_space.max['B'] = 1000. + self.model_space.min['C'] = 9. + self.model_space.max['C'] = 9.001 self.size = 100 - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.results = self.pop.pos.copy() expected_index = 77 @@ -472,7 +472,7 @@ class TestPopulation(unittest.TestCase): check the different type conversions. the main work is in test_import_positions_array. """ - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) source_type = [('A', 'f4'), ('B', 'f4'), ('C', 'f4'), ('D', 'f4'), ('_model', 'i4'), ('_particle', 'i4'), ('_gen', 'i4'), ('_rfac', 'f4')] @@ -548,7 +548,7 @@ class TestPopulation(unittest.TestCase): - no range or duplicate checking. - missing parameter. """ - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) source_type = [('A', 'f4'), ('B', 'f4'), ('C', 'f4'), ('D', 'f4'), ('_model', 'i4'), ('_particle', 'i4'), ('_gen', 'i4'), ('_rfac', 'f4')] source = np.array([(1.0, 0.0, 0.0, 0.0, 0, 0, 0, 0.0), @@ -584,7 +584,7 @@ class TestPopulation(unittest.TestCase): - range checks. - de-duplication. """ - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.position_constrain_mode = 'error' source_type = [('A', 'f8'), ('B', 'f8'), ('C', 'f8'), ('_model', 'i8'), ('_particle', 'i8'), ('_gen', 'i8'), ('_rfac', 'f8')] diff --git a/tests/test_project.py b/tests/test_project.py index 0d82908..f22d00a 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -106,16 +106,16 @@ class TestProject(unittest.TestCase): @mock.patch('pmsco.data.load_data') @mock.patch('pmsco.data.save_data') - def test_combine_symmetries(self, save_data_mock, load_data_mock): + def test_combine_domains(self, save_data_mock, load_data_mock): self.project.scans.append(project.Scan()) parent_task = dispatch.CalculationTask() parent_task.change_id(model=0, scan=0) - parent_task.model['wsym1'] = 0.5 + parent_task.model['wdom1'] = 0.5 child_tasks = [parent_task.copy()] * 2 for idx, task in enumerate(child_tasks): - task.change_id(sym=idx) + task.change_id(domain=idx) data1 = data.create_data(5, datatype='EI') data1['e'] = np.arange(5) @@ -126,7 +126,7 @@ class TestProject(unittest.TestCase): data3 = data1.copy() data3['i'] = (10. + 0.5 * 10.) / 1.5 - self.project.combine_symmetries(parent_task, child_tasks) + self.project.combine_domains(parent_task, child_tasks) save_data_mock.assert_called() args, kwargs = save_data_mock.call_args diff --git a/tests/test_swarm.py b/tests/test_swarm.py index ddd3fb6..c80966d 100644 --- a/tests/test_swarm.py +++ b/tests/test_swarm.py @@ -38,11 +38,11 @@ class TestSwarmPopulation(unittest.TestCase): def setUp(self): random.seed(0) self.test_dir = tempfile.mkdtemp() - self.domain = project.Domain() + self.model_space = project.ModelSpace() - self.domain.add_param('A', 1.5, 1.0, 2.0, 0.1) - self.domain.add_param('B', 2.5, 2.0, 3.0, 0.1) - self.domain.add_param('C', 3.5, 3.0, 4.0, 0.1) + self.model_space.add_param('A', 1.5, 1.0, 2.0, 0.1) + self.model_space.add_param('B', 2.5, 2.0, 3.0, 0.1) + self.model_space.add_param('C', 3.5, 3.0, 4.0, 0.1) self.expected_names = ('_gen', '_model', '_particle', '_rfac', 'A', 'B', 'C') self.size = POP_SIZE @@ -73,14 +73,14 @@ class TestSwarmPopulation(unittest.TestCase): return r def test_best_friend(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.best['_rfac'] = np.arange(self.size) friend = self.pop.best_friend(0) self.assertNotIsInstance(friend, np.ndarray) self.assertEqual(friend.dtype.names, self.expected_names) def test_advance_particle(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.pos['A'] = np.linspace(1.0, 2.0, POP_SIZE) self.pop.pos['B'] = np.linspace(2.0, 3.0, POP_SIZE) @@ -98,11 +98,11 @@ class TestSwarmPopulation(unittest.TestCase): for key in ['A','B','C']: for pos in self.pop.pos[key]: - self.assertGreaterEqual(pos, self.domain.min[key]) - self.assertLessEqual(pos, self.domain.max[key]) + self.assertGreaterEqual(pos, self.model_space.min[key]) + self.assertLessEqual(pos, self.model_space.max[key]) def test_is_converged(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.assertFalse(self.pop.is_converged()) i_sample = 0 result = self.pop.pos[i_sample] @@ -116,7 +116,7 @@ class TestSwarmPopulation(unittest.TestCase): self.assertTrue(self.pop.is_converged()) def test_convergence_1(self): - self.pop.setup(self.size, self.domain) + self.pop.setup(self.size, self.model_space) self.pop.pos['A'] = np.linspace(1.0, 2.0, POP_SIZE) self.pop.pos['B'] = np.linspace(2.0, 3.0, POP_SIZE)