update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100
tagged rev-master-2.0.0
This commit is contained in:
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions

View File

@ -38,13 +38,13 @@ PROJECT_NAME = "PEARL MSCO"
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER =
PROJECT_NUMBER = $(REVISION)
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
# quick idea about the purpose of the project. Keep the description short.
PROJECT_BRIEF = "PEARL multiple scattering calculations and optimizations"
PROJECT_BRIEF = "PEARL multiple scattering calculation and optimization"
# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
# in the documentation. The maximum height of the logo should not exceed 55
@ -228,7 +228,7 @@ TAB_SIZE = 4
# "Side Effects:". You can put \n's in the value part of an alias to insert
# newlines.
ALIASES =
ALIASES = "raise=@exception"
# This tag can be used to specify a number of word-keyword mappings (TCL only).
# A mapping has the form "name=value". For example adding "class=itcl::class"
@ -597,19 +597,19 @@ STRICT_PROTO_MATCHING = NO
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.
GENERATE_TODOLIST = YES
GENERATE_TODOLIST = NO
# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
# list. This list is created by putting \test commands in the documentation.
# The default value is: YES.
GENERATE_TESTLIST = YES
GENERATE_TESTLIST = NO
# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
# list. This list is created by putting \bug commands in the documentation.
# The default value is: YES.
GENERATE_BUGLIST = YES
GENERATE_BUGLIST = NO
# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
# the deprecated list. This list is created by putting \deprecated commands in
@ -761,9 +761,12 @@ WARN_LOGFILE =
INPUT = \
src/introduction.dox \
src/concepts.dox \
src/concepts-tasks.dox \
src/concepts-emitter.dox \
src/installation.dox \
src/execution.dox \
src/commandline.dox \
src/optimizers.dox \
../pmsco \
../projects \
../tests
@ -859,7 +862,7 @@ EXAMPLE_RECURSIVE = NO
# that contain images that are to be included in the documentation (see the
# \image command).
IMAGE_PATH =
IMAGE_PATH = src/images
# The INPUT_FILTER tag can be used to specify a program that doxygen should
# invoke to filter for each input file. Doxygen will invoke the filter program
@ -876,7 +879,7 @@ IMAGE_PATH =
# code is scanned, but not when the output code is generated. If lines are added
# or removed, the anchors will not be placed correctly.
INPUT_FILTER = /usr/bin/doxypy
INPUT_FILTER =
# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
# basis. Doxygen will compare the file name with each pattern and apply the
@ -885,7 +888,7 @@ INPUT_FILTER = /usr/bin/doxypy
# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
# patterns match the file name, INPUT_FILTER is applied.
FILTER_PATTERNS =
FILTER_PATTERNS = *.py=/usr/bin/doxypy
# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
# INPUT_FILTER) will also be used to filter the input files that are used for
@ -2328,7 +2331,7 @@ DIAFILE_DIRS =
# generate a warning when it encounters a \startuml command in this case and
# will not generate output for the diagram.
PLANTUML_JAR_PATH =
PLANTUML_JAR_PATH = $(PLANTUML_JAR_PATH)
# When using plantuml, the specified paths are searched for files specified by
# the !include statement in a plantuml block.

View File

@ -2,6 +2,11 @@ SHELL=/bin/sh
# makefile for PMSCO documentation
#
# requirements
#
# 1) doxygen
# 2) /usr/bin/doxypy
# 3) PLANTUML_JAR_PATH environment variable must point to plantUML jar.
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .html
@ -11,6 +16,9 @@ DOX=doxygen
DOXOPTS=
LATEX_DIR=latex
REVISION=$(shell git describe --always --tags --dirty --long || echo "unknown, "`date +"%F %T %z"`)
export REVISION
all: docs
docs: doxygen pdf
@ -22,5 +30,6 @@ pdf: doxygen
-$(MAKE) -C $(LATEX_DIR)
clean:
-rm -rf latex/*
-rm -rf html/*
-rm -r latex/*
-rm -r html/*

View File

@ -11,14 +11,19 @@ it is recommended to adhere to the standard syntax described below.
The basic command line is as follows:
@code{.sh}
[mpiexec -np NPROCESSES] python path-to-project.py [common args] [project args]
[mpiexec -np NPROCESSES] python path/to/pmsco path/to/project.py [common args] [project args]
@endcode
Include the first portion between square brackets if you want to run parallel processes.
Specify the number of processes as the @c -np option.
@c path-to-project.py should be the path and name to your project module.
@c path/to/pmsco is the directory where <code>__main.py__</code> is located.
Do not include the extension <code>.py</code> or a trailing slash.
@c path/to/project.py should be the path and name to your project module.
Common args and project args are described below.
Note: In contrast to earlier versions, the project module is not executed directly any more.
Rather, it is loaded by the main pmsco module as a 'plug-in'.
\subsection sec_common_args Common Arguments
@ -30,7 +35,7 @@ The following table is ordered by importance.
| Option | Values | Description |
| --- | --- | --- |
| -h , --help | | Display a command line summary and exit. |
| -m , --mode | single (default), grid, swarm | Operation mode. |
| -m , --mode | single (default), grid, swarm, genetic | Operation mode. |
| -d, --data-dir | file system path | Directory path for experimental data files (if required by project). Default: current working directory. |
| -o, --output-file | file system path | Base path and/or name for intermediate and output files. Default: pmsco_data |
| -t, --time-limit | decimal number | Wall time limit in hours. The optimizers try to finish before the limit. Default: 24.0. |
@ -39,6 +44,8 @@ The following table is ordered by importance.
| --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
| --log-disable | | Disable logging. By default, logging is on. |
| --pop-size | integer | Population size (number of particles) in swarm optimization mode. The default value is the greater of 4 or two times the number of calculation processes. |
| --seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
| --table-file | file system path | Name of the model table file in table scan mode. |
| -c, --code | edac (default) | Scattering code. At the moment, only edac is supported. |
@ -49,13 +56,14 @@ Multiple names can be specified and must be separated by spaces.
| Category | Description | Default Action |
| --- | --- | --- |
| all | shortcut to include all categories | |
| input | raw input files for calculator, including cluster and phase files in custom format | delete |
| output | raw output files from calculator | delete |
| phase | phase files in portable format for report | delete |
| cluster | cluster files in portable XYZ format for report | keep |
| debug | debug files | delete |
| model | output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) | keep |
| scan | output files in ETPAI format: scan (a_b_-1_-1_-1) | delete |
| scan | output files in ETPAI format: scan (a_b_-1_-1_-1) | keep |
| symmetry | output files in ETPAI format: symmetry (a_b_c_-1_-1) | delete |
| emitter | output files in ETPAI format: emitter (a_b_c_d_-1) | delete |
| region | output files in ETPAI format: region (a_b_c_d_e) | delete |
@ -84,36 +92,11 @@ This way, the file names and photoelectron parameters are versioned with the cod
whereas command line arguments may easily get forgotten in the records.
\subsection sec_project_example Example Argument Handling
\subsection sec_project_example Argument Handling
An example for handling the command line in a project module can be found in the twoatom.py demo project.
The following code snippet shows how the common and project arguments are separated and handled.
@code{.py}
def main():
# have the pmsco module parse the common arguments.
args, unknown_args = pmsco.pmsco.parse_cli()
# pass any arguments not handled by pmsco
# to the project-defined parse_project_args function.
# unknown_args can be passed to argparse.ArgumentParser.parse_args().
if unknown_args:
project_args = parse_project_args(unknown_args)
else:
project_args = None
# create the project object
project = create_project()
# apply the common arguments on the project
pmsco.pmsco.set_common_args(project, args)
# apply the specific arguments on the project
set_project_args(project, project_args)
# run the project
pmsco.pmsco.run_project(project)
@endcode
To handle command line arguments in a project module,
the module must define a <code>parse_project_args</code> and a <code>set_project_args</code> function.
An example can be found in the twoatom.py demo project.
\section sec_slurm Slurm Job Submission
@ -122,23 +105,24 @@ The command line of the Slurm job submission script for the Ra cluster at PSI is
This script is specific to the configuration of the Ra cluster but may be adapted to other Slurm-based queues.
@code{.sh}
qpmsco.sh [NOSUB] JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
qpmsco.sh [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
@endcode
Here, the first few arguments are positional and their order must be strictly adhered to.
After the positional arguments, optional arguments of the PMSCO project command line can be added in arbitrary order.
If you execute the script without arguments, it displays a short summary.
The job script is written to @c ~/jobs/\$JOBNAME.
The job script is written to @c $DESTDIR/$JOBNAME which is also the destination of calculation output.
| Argument | Values | Description |
| --- | --- | --- |
| NOSUB (optional) | NOSUB or omitted | If NOSUB is present as the first argument, create the job script but do not submit it to the queue. Otherwise, submit the job script. |
| DESTDIR | file system path | destination directory. must exist. a sub-dir $JOBNAME is created. |
| JOBNAME | text | Name of job. Use only alphanumeric characters, no spaces. |
| NODES | integer | Number of computing nodes. (1 node = 24 or 32 processors). Do not specify more than 2. |
| TASKS_PER_NODE | 1...24, or 32 | Number of processes per node. 24 or 32 for full-node allocation. 1...23 for shared node allocation. |
| WALLTIME:HOURS | integer | Requested wall time. 1...24 for day partition, 24...192 for week partition, 1...192 for shared partition. This value is also passed on to PMSCO as the @c --time-limit argument. |
| PROJECT | file system path | Python module (file path) that declares the project and starts the calculation. |
| MODE | single, swarm, grid | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
| MODE | single, swarm, grid, genetic | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
| ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |
*/

View File

@ -0,0 +1,185 @@
/*! @page pag_concepts_emitter Emitter configurations
\section sec_emitters Emitter configurations
\subsection sec_emit_intro Introduction
Since emitters contribute incoherently to the diffraction pattern,
it should make no difference how the emitters are grouped and calculated.
This fact can be used to distribute a calculation over multiple parallel processes
if each process calculates the diffraction pattern coming from one particular emitter atom.
In effect, some calculation codes are implemented for a single emitter per calculation.
With PMSCO, it is easy to distribute the emitters over parallel processes.
The project just declares the number of emitters and returns one specific cluster per emitter.
In the simplest case, this means that the emitter attribute of the cluster atoms is set differently,
while the atomic coordinates are the same for all clusters generated.
PMSCO takes care of dispatching the clusters to multiple calculation processes
depending on the number of allocated MPI processes
as well as summing up the resulting diffraction patterns.
In addition, the emitter framework also supports that clusters are tailored to a specific emitter configuration.
Suppose that the unit cell contains a large number of inequivalent emitters.
If all emitters had to be included in a single calculation,
the cluster would grow very large and the calculation would include many long scattering paths
that effectively did not contribute intensity to the final result.
Splitting a large cluster into small ones built locally around one emitter
can provide a significant performance gain in complex systems.
Note that the emitter framework does not require that an emitter _configuration_ contains only one emitter _atom_.
It is up to the project to define how many emitter configurations there are and what they encompass.
This should, however, normally not be necessary.
To avoid confusion, it is recommended to declare exactly one emitter atom per configuration.
\subsection sec_emit_implement Implementation
There are several implementation routes with varying complexity.
Which route to take can depend on the complexity of the system and/or the programming skills of the user.
The following class diagram illustrates the classes and packages involved in cluster generation.
@startuml "class diagram for cluster generation"
package pmsco {
class Project {
cluster_generator
export_cluster()
}
abstract class ClusterGenerator {
project
{abstract} count_emitters()
{abstract} create_cluster()
}
class LegacyClusterGenerator {
project
count_emitters()
create_cluster()
}
}
package "user project" {
class UserClusterGenerator {
project
count_emitters()
create_cluster()
}
note bottom : for complex cluster
class UserProject {
count_emitters()
create_cluster()
}
note bottom : for simple cluster
}
Project <|-- UserProject
ClusterGenerator <|-- LegacyClusterGenerator
ClusterGenerator <|-- UserClusterGenerator
Project *-- ClusterGenerator
UserProject .> LegacyClusterGenerator
UserProject .> UserClusterGenerator
@enduml
In general, the cluster is generated by calls to the project's cluster_generator object.
This can be either a custom generator class derived from pmsco.cluster.ClusterGenerator
or the default pmsco.cluster.LegacyClusterGenerator which calls the UserProject.
For simple clusters, it may be sufficient to implement the cluster directly in the user project class
(UserProject in the diagram).
For more complex systems, it is recommended to implement a custom cluster generator class
(UserClusterGenerator).
\subsubsection sec_emit_implement_legacy Static cluster implemented in project methods
This is the most simple route as it requires the implementation of one or two methods of the user project class.
It can be used for single-emitter and multi-emitter problems.
This implementation is active while a pmsco.cluster.LegacyClusterGenerator
is assigned to the project's cluster_generator attribute.
1. Implement a count_emitters method in your project class
if the project uses more than one emitter configurations.
It must have same method contract as pmsco.cluster.ClusterGenerator.count_emitters.
Specifically, it must return the number of emitter configurations of a given model, scan and symmetry.
If there is only one configuration, the method does not need to be implemented.
2. Implement a create_cluster method in your project class.
It must have same method contract as pmsco.cluster.ClusterGenerator.create_cluster.
Specifically, it must return a cluster.Cluster object for the given model, scan, symmetry and emitter configuration.
The emitter atoms must be marked according to the emitter configuration specified by the index argument.
Note that, depending on the index.emit argument, all emitter atoms must be marked
or only the ones of the corresponding emitter configuration.
3. (Optionally) override the pmsco.project.Project.combine_emitters method
if the emitters should be added with non-uniform weights.
Although it's possible to produce emitter-dependent clusters using this approach,
this is usually not recommended.
Rather, the generator approach described below should be followed in this case.
\subsubsection sec_emit_implement_generator Static cluster implemented by generator class
The preferred way of creating clusters is to implement a _generator_ class
because it is the most scalable way from simple to complex systems.
In addition, one cluster generator class can be quickly exchanged for another
if there are multiple possibilities.
1. Implement a cluster generator class which inherits from pmsco.cluster.ClusterGenerator
in your project module.
2. Implement the create_cluster and count_emitters methods of the generator.
The method contracts are the same as the ones described in the previous paragraph,
just in the context of a separate class.
3. Initialize an instance of the generator and assign it to the project.cluster_generator attribute
in the initialization of your project.
\subsubsection sec_emit_implement_local Local clusters implemented by generator class
The basic method contract outlined in the previous paragraph is equally applicable to the case
where a local cluster is generated for each emitter configuration.
Again, the generator class with the two methods (count_emitters and create_cluster) is the minimum requirement.
However, for ease of code maintenance and/or for improved performance of large clusters,
some internal structure may be helpful.
Suppose that the system consists of a large supercell containing many emitters
and that a small cluster shall be built for each emitter configuration.
During the calculations, the generator will receive several calls to the count_emitters and create_cluster methods.
Every time the model and index are the same, the functions must return the same result.
Thus, most importantly, the implementation must make sure that the results are fully deterministic.
Second, depending on the complexity, it could be more efficient to cache a cluster for later use.
One way to reduce the complexity is to introduce a _master cluster_
from which the emitter configurations and individual clusters are derived.
1. Implement a master_cluster method with the same arguments and result types as create_cluster.
The method returns a full cluster of the supercell and its neighbouring cells.
All inequivalent emitters are marked (which determines the number of emitter configurations).
2. Decorate the master_cluster with pmsco.dispatch.CachedCalculationMethod.
This pre-defined decorator transparently caches the cluster
so that subsequent calls with the same arguments do not re-create the cluster but return the cached one.
3. The count_emitters method can simply return the emitter count of the master cluster.
4. The create_cluster method calls master_cluster() and extracts the region
corresponding to the requested emitter configuration.
\subsection sec_emit_report Reporting
The pmsco.project.Project class implements a method that saves a cluster to two XYZ files,
one containing the coordinates of all atoms
and one containing only the coordinates of the emitters.
The method is called for each cluster that is passed to the calculator, i.e., each emitter index.
You may override the method in your project to alter the reporting.
*/

View File

@ -0,0 +1,3 @@
/*! @page pag_concepts_model Model
*/

View File

@ -0,0 +1,3 @@
/*! @page pag_concepts_region Region
*/

View File

@ -0,0 +1,31 @@
/*! @page pag_concepts_scan Scans
\section sec_scanning Scanning
PMSCO with EDAC currently supports the following scan axes.
- kinetic energy E
- polar angle theta T
- azimuthal angle phi P
- analyser angle alpha A
The following combinations of these scan axes are allowed (see pmsco.data.SCANTYPES).
- E
- E-T
- E-A
- T-P (hemispherical or hologram scan)
@attention The T and A axes cannot be combined.
If a scan of one of them is specified, the other is assumed to be fixed at zero!
This assumption may change in the future,
so it is best to explicitly set the fixed angle to zero in the scan file.
@remark According to the measurement geometry at PEARL,
alpha scans are implemented in EDAC as theta scans at phi = 90 in fixed cluster mode.
The switch to fixed cluster mode is made by PMSCO internally,
no change of angles or other parameters is necessary in the scan or project files
besides filling the alpha instead of the theta column.
*/

View File

@ -0,0 +1,32 @@
/*! @page pag_concepts_symmetry Symmetry
\section sec_symmetry Symmetry and Domain Averaging
A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
that is derived from the same set of model parameters
and that contributes incoherently to the measured diffraction pattern.
A symmetry may be represented by a special symmetry parameter which is not subject to optimization.
For instance, a real sample may have additional rotational domains that are not present in the cluster,
increasing the symmetry from three-fold to six-fold.
Or, an adsorbate may be present in a number of different lateral configurations on the substrate.
In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement.
In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations.
PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks).
The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level.
This section explains the necessary steps.
1. Your project needs to populate the pmsco.project.Project.symmetries list.
For each symmetry, add a dictionary of symmetry parameters, e.g. <code>{'angle_azi': 15.0}</code>.
There must be at least one symmetry in a project, otherwise no calculation is executed.
2. The project may apply the symmetry of a task to the cluster and parameter file if necessary.
The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters.
3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement.
The default method implemented in pmsco.project.Project just adds up all calculations with equal weight.
If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm.
*/

306
docs/src/concepts-tasks.dox Normal file
View File

@ -0,0 +1,306 @@
/*! @page pag_concepts_tasks Task concept
\section sec_tasks Calculation tasks
A _calculation task_ defines a concrete set of model parameters, atomic coordinates, emitter configuration,
experimental reference and meta-data (such as file names)
that completely defines how to produce the input data for the scattering program (the _calculator_).
For each task, the calculator is executed once and produces one result dataset.
In a typical optimization project, however, the calculator is executed multiple times for various reasons
mandated by the project but also efficient calculations in a multi-process environment:
1. The calculation must be repeated under variation of parameters.
A concrete set of parameters is called @ref sec_task_model.
2. The sample was measured multiple times or under different conditions (initial states, photon energy, emission angle).
Each contiguous measured dataset is called a @ref sec_task_scan.
3. The measurement averages over multiple inequivalent domains, cf. @ref sec_task_symmetry.
4. The measurement includes multiple geometrically inequivalent emitters, cf. @ref sec_task_emitter.
5. The calculation should be distributed over multiple processes that run in parallel to reduce the wall time, cf. @ref sec_task_region.
In PMSCO, these aspects are modelled as attributes of a calculation task
as shown schematically in the following diagram.
@startuml "attributes of a calculation task"
class CalculationTask {
model
scan
symmetry
emitter
region
..
files
}
class Model {
index
..
dlat
dAS
dS1S2
V0
Zsurf
Texp
rmax
}
class Scan {
index
..
filename
mode
initial_state
energies
thetas
phis
alphas
}
class Symmetry {
index
..
rotation
registry
}
class Emitter {
index
}
class Region {
index
..
range
}
CalculationTask *-- Model
CalculationTask *-- Scan
CalculationTask *-- Symmetry
CalculationTask *-- Emitter
CalculationTask *-- Region
class Project {
scans
symmetries
model_handler
cluster_generator
}
class ClusterGenerator {
count_emitters()
create_cluster()
}
class ModelHandler {
create_tasks()
add_result()
}
Model ..> ModelHandler
Scan ..> Project
Symmetry ..> Project
Emitter ..> ClusterGenerator
Region ..> Project
Project *-left- ModelHandler
Project *- ClusterGenerator
hide empty members
@enduml
Although the attributes may have quite different types (as detailed below),
each instance is also given a unique (per attribute) integer index,
where -1 means that the attribute is undefined.
The indices of the five attributes together (pmsco.dispatch.CalcID tuple)
serve internally to identify a task and the data belonging it.
The identifier appears, for instance, in input and output file names.
Normally, data files are deleted after the calculation, and only a few top-level files are kept
(can be overridden at the command line or in the project code).
At the top level, only the model ID is set, the other ones are undefined (-1).
\subsection sec_task_model Model
The _model_ attribute is a dictionary of continuously variable parameters of the system such as lattice constants, relaxation constants, rotation angles, etc.
It may also define non-structural or non-physical parameters such as temperature, inner potential or cluster radius.
The dictionary contains key-value pairs where the keys are up to the user project (the figure shows some examples).
The values are floating-point numbers that are chosen by the model handler within the domain specified by the user project.
Models are generated by the chosen optimizer according to a particular algorithm or, in single mode, directly by the project.
Each specific instance of model parameters is given a unique index that allows to identify related input and output files.
Model parameters are reported with the corresponding R-factors during the optimization process.
\subsection sec_task_scan Scan
The _scan_ attribute is an index into the list of scans defined by the user project.
Each scan refers to one experimental data file and, thus, defines the initial and final states of the photoelectron.
PMSCO runs a separate calculation for each scan file and compares the combined results to the experimental data.
This is sometimes called a _global fit_.
\subsection sec_task_symmetry Symmetry
A _symmetry_ is a discrete variant of a set of calculation parameters (including the atomic cluster)
that is independent of the _model_ and contributes incoherently to the measured diffraction pattern.
For instance, for a system that includes two inequivalent structural domains,
two separate clusters have to be generated and calculated for each model.
The symmetry parameter is not subject to optimization.
However, if the branching ratio is unknown a priori, a model parameter can be introduced
to control the relative contribution of a particular symmetry to the diffraction pattern.
In that case, the @ref pmsco.project.Project.combine_symmetries method must be overridden.
A symmetry is identified by its index which is an index into the project's symmetries table (pmsco.project.Project.symmetries).
It is up to the user project to give a physical description of the symmetry, e.g. a rotation angle,
by assigning a meaningful value (e.g. a dictionary with key-value pairs) to the symmetries table.
The cluster generator can then read the value from the table rather than from constants in the code.
The figure shows two examples of symmetry parameters.
The corresponding symmetry table could be set up like this:
@code{.py}
project.add_symmetry = {'rotation': 0.0, 'registry': 0.0}
project.add_symmetry = {'rotation': 30.0, 'registry': 0.0}
@endcode
\subsection sec_task_emitter Emitter
The _emitter_ component of the calculation task selects a specific emitter configuration of the cluster generator.
This is merely an index whose interpretation is up to the cluster generator.
The default emitter handler enumerates the emitter index from 1 to the emitter count reported by the cluster generator.
The emitter count and list of emitters may depend on model, scan and symmetry.
The cluster generator can tailor a cluster to the given model, scan, symmetry and emitter index.
For example, in a large unit cell with many inequivalent emitters,
the generator might return a small sub-cluster around the actual emitter for better calculation performance
since the distant atoms of the unit cell do not contribute to the diffraction pattern.
Emitter branching must be requested specifically by using a particular pattern in the code.
By default, it is disabled, which allows the cluster code to be written in a slightly easier way.
\subsection sec_task_region Region
The _region_ handler may split a scan region into several smaller chunks
so that the tasks can be distributed to multiple processes.
Chunking by energy regions is enabled automatically if the project contains an energy scan of at least 10 points
and the project is run in multiple processes.
It can be disabled by the user project.
\section sec_task_handler Task handlers
The previous section described the five important attributes of a calculation task.
These attributes span a five-dimensional index space
where each point maps to one task and, consequently, one calculation and one result dataset.
To populate the index space, however, calculation tasks are more adequately arranged in a tree-like hierarchy with five levels.
The code that defines attributes and processes results can then be separated into _handlers_.
Each level calls for a particular functional contract of the handler.
According to object-oriented principles the contracts at the five levels are defined by abstract base classes
which can be sub-classed for more specific behaviour.
For instance, the class of the model handler is chosen based on the execution mode (single, grid, swarm, etc.).
Though it is possible for a project to define its own handlers,
the PMSCO core declares handlers that should cover most calculation scenarios.
The following diagram shows the tree of calculation tasks and how handlers act on the task objects to populate the task attributes.
At the top of the tree, an empty task object (all attributes undefined) is fed into the model level handler which takes care of the model attribute.
The model handler generates a number of sub-tasks, one for each set of model parameters.
Each of these (incompletely defined) tasks is then passed to the next handler, and so on.
@startuml "calculation task hierarchy and task handler stack"
object "Root: CalculationTask" as Root {
index = (-1,-1,-1,-1,-1)
}
note right: all attributes undefined
object "Model: CalculationTask" as Model {
index = (i,-1,-1,-1,-1)
model
}
note right: model is defined\nother attributes undefined
object ModelHandler
object "Scan: CalculationTask" as Scan {
index = (i,j,-1,-1,-1)
model
scan
}
object ScanHandler
object "Sym: CalculationTask" as Sym {
index = (i,j,k,-1,-1)
model
scan
symmetry
}
object "SymmetryHandler" as SymHandler
object "Emitter: CalculationTask" as Emitter {
index = (i,j,k,l,-1)
model
scan
symmetry
emitter
}
object EmitterHandler
object "Region: CalculationTask" as Region {
index = (i,j,k,l,m)
model
scan
symmetry
emitter
region
}
note right: all attributes well-defined
object RegionHandler
Root "1" o.. "1..*" Model
Model "1" o.. "1..*" Scan
Scan "1" o.. "1..*" Sym
Sym "1" o.. "1..*" Emitter
Emitter "1" o.. "1..*" Region
(Root, Model) .. ModelHandler
(Model, Scan) .. ScanHandler
(Scan, Sym) .. SymHandler
(Sym, Emitter) .. EmitterHandler
(Emitter, Region) .. RegionHandler
@enduml
At the end of the stack, the tasks are fully specified and are passed to the calculation queue.
They are dispatched to the available processes of the MPI environment in which PMSCO was started,
which allows calculations to be run in parallel.
Only now that the model is broken down into multiple, fully specified tasks,
the cluster and input files are generated, and the calculation program is started.
At the end of a calculation, the output files are associated with their original task objects,
and the tasks are passed back through the task handler stack.
In this phase, each level joins the datasets from the sub-tasks to the data requested by the parent task.
For example, at the lowest level, one result file is present for each region.
The region handler gathers all files that correspond to the same parent task
(i.e. have the same emitter, symmetry, scan and model attributes),
joins them to one file which includes all regions,
links the file to the parent task and passes the result to the next higher level.
On the top level, the model handler compares the result to the experimental data.
Depending on the operation mode, it refines the model parameters and issues new tasks by passing them down the stack.
When the optimization is finished (according to a set of defined criteria),
The model handler returns the root task to the caller, which causes PMSCO to exit.
*/

View File

@ -1,153 +1,85 @@
/*! @page pag_concepts Design Concepts
\section sec_tasks Tasks
/*! @page pag_concepts Design
In an optimization project, a number of optimizable, high-level parameters generated by the optimization algorithm
must be mapped to the input parameters and atomic coordinates before the calculation program is executed.
Possibly, the calculation program is executed multiple times for inequivalent domains, emitters or scan geometries.
After the calculation, the output is collected, compared to the experimental data, and the model is refined.
In PMSCO, the optimization is broken down into a set of _tasks_ and assigned to a stack of task _handlers_ according to the following figure.
Each invocation of the scattering program (EDAC) runs a specific task,
i.e. a calculation for a set of specific parameters, a fully-qualified cluster of atoms, and a specific angle and/or energy scan.
\section sec_components Components
\dotfile tasks.dot "PMSCO task stack"
The code for a PMSCO job consists of the following components.
At the root, the _model handler_ proposes models that need to be calculated according to the operation mode specified at the command line.
A _model_ is the minimum set of variable parameters in the context of a custom project.
Other parameters that will not vary under optimization are set directly by the project code.
The model handler may generate models based on a fixed scheme, e.g. on a grid, or based on R-factors of previous results.
@startuml "top-level components of scattering and optimization code"
For each model, one task is passed to the task handling chain, starting with the scan handler.
The _scan handler_ generates sub-tasks for each experimental scan dataset.
This way, the model can be optimized for multiple experimental scans in the same run (see Sec. \ref sec_scanning).
skinparam componentStyle uml2
The _symmetry handler_ generates sub-tasks based on the number of symmetries contained in the experimental data (see Sec. \ref sec_symmetry).
For instance, for a system that includes two inequivalent structural domains, two separate calculations have to be run for each model.
The symmetry handler is implemented on the project level and may be customized for a specific system.
component "project" as project
component "PMSCO" as pmsco
component "scattering code\n(calculator)" as calculator
The _emitter handler_ generates a sub-task for each inequivalent emitter atom
so that the tasks can be distributed to multiple processes (see Sec. \ref sec_emitters).
In a single-process environment, all emitters are calculated in one task.
interface "command line" as cli
interface "input files" as input
interface "output files" as output
interface "experimental data" as data
interface "results" as results
The _region handler_ may split a scan region into several smaller chunks
so that the tasks can be distributed to multiple processes.
With EDAC, only energy scans can benefit from chunking
since it always calculates the full angular distribution.
This layer has to be enabled specifically in the project module.
It is disabled by default.
data -> project
project ..> pmsco
pmsco ..> calculator
cli --> project
input -> calculator
calculator -> output
pmsco -> results
At the end of the stack, the tasks are fully specified and are passed to the calculation queue.
They are dispatched to the available processes of the MPI environment in which PMSCO was started,
which allows calculations to be run in parallel.
Only now that the model is broken down into multiple tasks,
the cluster and input files are generated, and the calculation program is started.
At the end of a calculation, the output is passed back through the task handler stack.
In this phase, each level gathers the datasets from the sub-tasks to the data requested by the parent task
and passes the result to the next higher level.
On the top level, the calculation is compared to the experimental data.
Depending on the operation mode, the model parameters are refined, and new tasks issued.
If the optimization is finished according to a set of defined criteria, PMSCO exits.
As an implentation detail, each task is given a unique _identifier_ consisting of five integer numbers
which correspond to the five levels model, scan, symmetry, emitter and region.
The identifier appears in the file names in the communication with the scattering program.
Normally, the data files are deleted after the calculation, and only a few top-level files are kept
(can be overridden at the command line or in the project code).
At the top level, only the model ID is set, the other ones are undefined (-1).
@enduml
\section sec_symmetry Symmetry and Domain Averaging
The _project_ consists of program code, system and experimental parameters
that are specific to a particular experiment and calculation job.
The project code reads experimental data, defines the parameter dictionary of the model,
and contains code to generate the cluster, parameter and phase files for the scattering code.
The project is also the main entry point of process execution.
A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
that is derived from the same set of model parameters
and that contributes incoherently to the measured diffraction pattern.
A symmetry may be represented by a special symmetry parameter which is not subject to optimization.
The _scattering code_ on the other hand is a static calculation engine
which accepts detailed input files
(parameters, atomic coordinates, emitter specification, scattering phases)
and outputs an intensity distribution of photoelectrons versus energy and/or angle.
For instance, a real sample may have additional rotational domains that are not present in the cluster,
increasing the symmetry from three-fold to six-fold.
Or, an adsorbate may be present in a number of different lateral configurations on the substrate.
In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement.
In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations.
PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks).
The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level.
This section explains the necessary steps.
1. Your project needs to populate the pmsco.project.Project.symmetries list.
For each symmetry, add a dictionary of symmetry parameters, e.g. <code>{'angle_azi': 15.0}</code>.
There must be at least one symmetry in a project, otherwise no calculation is executed.
2. The project may apply the symmetry of a task to the cluster and parameter file if necessary.
The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters.
3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement.
The default method implemented in pmsco.project.Project just adds up all calculations with equal weight.
If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm.
The _PMSCO core_ interfaces between the project and the calculator.
It carries out the structural optimization and manages the calculation tasks.
It generates and sends input files to the calculator and reads back the output.
\section sec_scanning Scanning
\section sec_control_flow Control flow
PMSCO with EDAC currently supports the following scan axes.
The basic control flow of a optimization job is depicted schematically in the following figure.
- kinetic energy E
- polar angle theta T
- azimuthal angle phi P
- analyser angle alpha A
@startuml "top-level activity diagram"
The following combinations of these scan axes are allowed (see pmsco.data.SCANTYPES).
start
:initialize;
:import experimental data;
repeat
:define tasks;
fork
:calculate\ntask 1;
fork again
:calculate\ntask N;
end fork
:evaluate results;
repeat while
-> [finished];
:report results;
- E
- E-T
- E-A
- T-P (hemispherical or hologram scan)
stop
@attention The T and A axes cannot be combined.
If a scan of one of them is specified, the other is assumed to be fixed at zero!
This assumption may change in the future,
so it is best to explicitly set the fixed angle to zero in the scan file.
@enduml
@remark According to the measurement geometry at PEARL,
alpha scans are implemented in EDAC as theta scans at phi = 90 in fixed cluster mode.
The switch to fixed cluster mode is made by PMSCO internally,
no change of angles or other parameters is necessary in the scan or project files
besides filling the alpha instead of the theta column.
After importing experimental data and setting up the model dictionary and job parameters,
the calculation tasks are defined depending on the execution mode and system setup.
Each task consists of a specific set of model, experimental and calculation parameters
that describe an independent calculation step,
while several steps may be required to produce a dataset that can be compared to the experimental data.
The idea is that tasks can be defined quickly
and that the time-consuming operations are dispatched to slave processes which can run in parallel.
\section sec_emitters Emitter Configurations
Since emitters contribute incoherently to the diffraction pattern,
it should make no difference how the emitters are grouped and calculated.
EDAC allows to specify multiple emitters in one calculation.
However, running EDAC multiple times for a single-emitter configuration or simply summing up the results
gives the same final diffraction pattern with no significant difference of used CPU time.
It is, thus, easy to distribute the emitters over parallel processes in a multi-process environment.
PMSCO can handle this transparently with a minimal effort.
Within the same framework, PMSCO also supports that clusters are tailored to a specific emitter configuration.
Suppose that the unit cell contains a large number of inequivalent emitters.
If all emitters had to be included in a single calculation,
the cluster would grow very large and the calculation would take a long time
because it would include many long scattering paths
that effectively do not contribute intensity to the final result.
Using single-emitters, a cluster can be built locally around the emitter and kept to a reasonable size.
Even when using this feature, PMSCO does not require that each configuration contains only one emitter.
The term _emitter_ effectively means _emitter configuration_.
A configuration can include multiple emitters which will not be broken up further.
It is up to the project, what is included in a particular configuration.
To enable emitter handling,
1. override the count_emitters method of your cluster generator
and return the number of emitter configurations of a given model, scan and symmetry.
2. handle the emitter index in your create_cluster method.
3. (optionally) override the pmsco.project.Project.combine_emitters method
if the emitters should not be added with equal weights.
For implementation details see the respective method descriptions.
As soon as all necessary results are available they are combined into one dataset and compared to the experimental data.
Depending on the execution mode, the process of task definition and calculation repeats until the model has converged
or the calculations are stopped for another reason.
*/

View File

@ -14,26 +14,29 @@ Run PMSCO from the command prompt:
@code{.sh}
cd work-dir
python project-dir/project.py [pmsco-arguments] [project-arguments]
python pmsco-dir project-dir/project.py [pmsco-arguments] [project-arguments]
@endcode
where <code>work-dir</code> is the destination directory for output files,
<code>pmsco-dir</code> is the directory containing the <code>__main__.py</code> file,
<code>project.py</code> is the specific project module,
and <code>project-dir</code> is the directory where the project file is located.
PMSCO is run in one process which handles all calculations sequentially.
The command line arguments are usually divided into common arguments interpreted by the main pmsco code (pmsco.py),
The command line arguments are divided into common arguments interpreted by the main pmsco code (pmsco.py),
and project-specific arguments interpreted by the project module.
However, it is ultimately up to the project module how the command line is interpreted.
Example command line for a single EDAC calculation of the two-atom project:
@code{.sh}
cd work/twoatom
python pmsco/projects/twoatom/twoatom.py -s ea -o twoatom-demo -m single
python ../../pmsco ../../projects/twoatom/twoatom.py -s ea -o twoatom-demo -m single
@endcode
The project file <code>twoatom.py</code> takes the lead of the project execution.
Usually, it contains only project-specific code and delegates common tasks to the main pmsco code.
This command line executes the main pmsco module <code>pmsco.py</code>.
The main module loads the project file <code>twoatom.py</code> as a plug-in
and starts processing the common arguments.
The <code>twoatom.py</code> module contains only project-specific code
with several defined entry-points called from the main module.
In the command line above, the <code>-o twoatom-demo</code> and <code>-m single</code> arguments
are interpreted by the pmsco module.
@ -61,7 +64,7 @@ For optimum performance, the number of processes should not exceed the number of
To start a two-hour optimization job with multiple processes on an quad-core workstation with hyperthreading:
@code{.sh}
cd work/my_project
mpiexec -np 8 project-dir/project.py -o my_job_0001 -t 2 -m swarm
mpiexec -np 8 pmsco-dir/pmsco project-dir/project.py -o my_job_0001 -t 2 -m swarm
@endcode
@ -84,4 +87,4 @@ bin/qpmsco.ra.sh my_job_0001 1 8 2 projects/my_project/project.py swarm
Be sure to consider the resource allocation policy of the cluster
before you decide on the number of processes.
Requesting less resources will prolong the run time but might increase the scheduling priority.
*/
*/

View File

@ -3,60 +3,71 @@
\subsection sec_general General Remarks
The PMSCO code is maintained under git.
The PMSCO code is maintained under [Git](https://git-scm.com/).
The central repository for PSI-internal projects is at https://git.psi.ch/pearl/pmsco,
the public repository at https://gitlab.psi.ch/pearl/pmsco.
For their own developments, users should clone the repository.
Changes to common code should be submitted via pull requests.
\subsection sec_requirements Requirements
The program code of PMSCO and its external programs is written in Python, C++ and Fortran.
The code will run in any recent Linux environment on a workstation or in a virtual machine.
Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
For optimization jobs, a workstation with at least 4 processor cores
or cluster with 20-50 available processor cores is recommended.
The program requires about 2 GB of RAM per process.
The recommended IDE is [PyCharm (community edition)](https://www.jetbrains.com/pycharm).
The documentation in [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) format is part of the source code.
The Doxygen compiler can generate separate documentation in HTML or LaTeX.
The MSC and EDAC codes compile with the GNU Fortran and C++ compilers on Linux.
Other compilers may work but have not been tested.
The code will run in any recent Linux environment on a workstation or in a virtual machine.
Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
For optimization jobs, a high-performance cluster with 20-50 available processor cores is recommended.
The code requires about 2 GB of RAM per process.
Please note that it may be important that the code remains compatible with earlier compiler and library versions.
Newer compilers or the latest versions of the libraries contain features that will break the compatibility.
The code can be used with newer versions as long they are backward compatible.
\subsection sec_requirements Requirements
Please note that in some environments (particularly shared high-performance machines)
it may be important to choose specific compiler and library versions.
In order to maintain backward compatibility with some of these older machines,
code that requires new versions of compilers and libraries should be introduced carefully.
The code depends on the following libraries:
- GCC 4.8
- OpenMPI 1.10
- GCC >= 4.8
- OpenMPI >= 1.10
- F2PY
- F2C
- SWIG
- Python 2.7 (incompatible with Python 3.0)
- Numpy 1.11 (incompatible with Numpy 1.13 and later)
- MPI4PY (from PyPI)
- BLAS
- LAPACK
- periodictable
- Python 2.7 or 3.6
- Numpy >= 1.11
- Python packages from PyPI listed in the requirements.txt file
Most of these requirements are available from the Linux distribution.
For an easily maintainable Python environment, [Miniconda](https://conda.io/miniconda.html) is recommended.
The Python environment distributed with the OS often contains outdated packages,
and it's difficult to switch between different Python versions.
Most of these requirements are available from the Linux distribution, or from PyPI (pip install), respectively.
If there are any issues with the packages installed by the distribution, try the ones from PyPI
(e.g. there is currently a bug in the Debian mpi4py package).
The F2C source code is contained in the repository for machines which don't have it installed.
On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
Details are explained in the PEARL Wiki.
\subsubsection sec_install_ubuntu Installation on Ubuntu 16.04
PMSCO runs under Python 2.7 or Python 3.6 or higher.
Since Python 2 is being deprecated, the code has been ported to Python 3.6.
Compatibility with Python 2.7 is maintained by using
the [future package](http://python-future.org/compatible_idioms.html).
New code should be written according to their guidelines.
The following instructions install the necessary dependencies on Ubuntu (or Lubuntu 16.04):
\subsection sec_install_instructions Instructions
\subsubsection sec_install_ubuntu Installation on Ubuntu
The following instructions install the necessary dependencies on Ubuntu, Debian or related distributions.
The Python environment is provided by [Miniconda](https://conda.io/miniconda.html).
@code{.sh}
sudo apt-get update
sudo apt update
sudo apt-get install \
sudo apt install \
binutils \
build-essential \
doxygen \
@ -67,38 +78,114 @@ gcc \
gfortran \
git \
graphviz \
ipython \
libblas-dev \
liblapack-dev \
libopenmpi-dev \
make \
nano \
openmpi-bin \
openmpi-common \
python-all \
python-mock \
python-nose \
python-numpy \
python-pip \
python-scipy \
python2.7-dev \
swig
sqlite3 \
swig \
wget
@endcode
sudo pip install --system mpi4py periodictable
On systems where the link to libblas is missing (see @ref sec_compile below),
the following lines are necessary.
@code{.sh}
cd /usr/lib
sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
@endcode
The following instructions install the PyCharm IDE and a few other useful utilities:
Install Miniconda according to their [instructions](https://conda.io/docs/user-guide/install/index.html),
then configure the Python environment:
@code{.sh}
sudo sh -c 'echo "deb http://archive.getdeb.net/ubuntu xenial-getdeb apps" >> /etc/apt/sources.list.d/getdeb.list'
wget -q -O - http://archive.getdeb.net/getdeb-archive.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install \
conda create -q --yes -n pmsco python=2.7
source activate pmsco
conda install -q --yes -n pmsco \
pip \
numpy \
scipy \
ipython \
mpi4py \
matplotlib \
nose \
mock \
future \
statsmodels
pip install periodictable attrdict fasteners
@endcode
\subsubsection sec_install_singularity Installation in Singularity container
A [Singularity](https://www.sylabs.io/guides/2.5/user-guide/index.html) container
contains all OS and Python dependencies for running PMSCO.
Besides the Singularity executable, nothing else needs to be installed in the host system.
This may be the fastest way to get PMSCO running.
For installation of Singularity,
see their [user guide](https://www.sylabs.io/guides/2.5/user-guide/installation.html).
On newer Linux systems (e.g. Ubuntu 18.04), Singularity is available from the package manager.
Installation in a virtual machine on Windows or Mac are straightforward
thanks to the [Vagrant system](https://www.vagrantup.com/).
After installing Singularity,
check out PMSCO as explained in the @ref sec_compile section:
@code{.sh}
cd ~
mkdir containers
git clone git@git.psi.ch:pearl/pmsco.git pmsco
cd pmsco
git checkout master
git checkout -b my_branch
@endcode
Then, either copy a pre-built container into `~/containers`,
or build one from a script provided by the PMSCO repository:
@code{.sh}
cd ~/containers
sudo singularity build pmsco.simg ~/containers/pmsco/extras/singularity/singularity_python2
@endcode
To work with PMSCO, start an interactive shell in the container and switch to the pmsco environment.
Note that the PMSCO code is outside the container and can be edited with the usual tools.
@code{.sh}
cd ~/containers
singularity shell pmsco.simg
source activate pmsco
cd ~/containers/pmsco
make all
nosetests -w tests/
@endcode
Or call PMSCO from outside:
@code{.sh}
cd ~/containers
mkdir output
cd output
singularity run ../pmsco.simg python ~/containers/pmsco/pmsco path/to/your-project.py arg1 arg2 ...
@endcode
For parallel processing, prepend `mpirun -np X` to the singularity command as needed.
\subsubsection sec_install_extra Additional Applications
For working with the code and data, some other applications are recommended.
The PyCharm IDE can be installed from the Ubuntu software center.
The following commands install other useful helper applications:
@code{.sh}
sudo apt install \
avogadro \
gitg \
meld \
openjdk-9-jdk \
pycharm
meld
@endcode
To produce documentation in PDF format (not recommended on virtual machine), install LaTeX:
@ -124,15 +211,18 @@ Private key authentication is usually recommended except on shared computers.
Clone the code repository using one of these repositiory addresses and switch to the desired branch:
@code{.sh}
cd ~
git clone git@git.psi.ch:pearl/pmsco.git pmsco
cd pmsco
git checkout master
git checkout -b my_branch
@endcode
The compilation of the various modules is started by <code>make all</code>.
The compilation step is necessary only once after installation.
Compile the code and run the unit tests to check that it worked.
@code{.sh}
make all
nosetests -w tests/
@endcode
If the compilation of _loess.so failes due to a missing BLAS library,
try to set a link to the BLAS library as follows (the actual file names may vary due to the actual distribution or version):
@ -150,7 +240,7 @@ Re-check from time to time.
@code{.sh}
cd ~/pmsco
nosetests
nosetests -w tests/
@endcode
Run the twoatom project to check the compilation of the calculation programs.
@ -161,8 +251,10 @@ mkdir work
cd work
mkdir twoatom
cd twoatom/
nice python ~/pmsco/projects/twoatom/twoatom.py -s ~/pmsco/projects/twoatom/twoatom_energy_alpha.etpai -o twoatom_energy_alpha -m single
nice python ~/pmsco/pmsco ~/pmsco/projects/twoatom/twoatom.py -s ea -o twoatom_energy_alpha -m single
@endcode
Runtime warnings may appear because the twoatom project does not contain experimental data.
To learn more about running PMSCO, see @ref pag_run.
*/

View File

@ -17,11 +17,6 @@ PMSCO is written in Python 2.7.
EDAC is written in C++, MSC in Fortran.
PMSCO interacts with the calculation programs through Python wrappers for C++ or Fortran.
The MSC and EDAC source code is contained in the same software repository.
The PMSCO, MSC, and EDAC programs may not be used outside the PEARL group without an explicit agreement by the respective original authors.
Users of the PMSCO code are requested to coordinate and share the development of the code with the original author.
Please read and respect the respective license agreements.
\section sec_intro_highlights Highlights
@ -54,8 +49,25 @@ and the example projects.
\section sec_intro_start Getting Started
- @ref pag_concepts
- @ref pag_concepts_tasks
- @ref pag_concepts_emitter
- @ref pag_install
- @ref pag_run
- @ref pag_command
\section sec_license License Information
An open distribution of PMSCO is available under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) at <https://gitlab.psi.ch/pearl-public/pmsco>.
- Please read and respect the respective license agreements.
- Please acknowledge the use of the code.
- Please share your development of the code with the original author.
Due to different copyright, the MSC and EDAC programs are not contained in the public software repository.
These programs may not be used without an explicit agreement by the respective original authors.
\author Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
\version This documentation is compiled from version $(REVISION).
\copyright 2015-2018 by [Paul Scherrer Institut](http://www.psi.ch)
\copyright Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
*/

141
docs/src/optimizers.dox Normal file
View File

@ -0,0 +1,141 @@
/*! @page pag_opt Model optimizers
\section sec_opt Model optimizers
\subsection sec_opt_swarm Particle swarm
The particle swarm algorithm is adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012).
The general parameters of the genetic algorithm are specified in the @ref Project.optimizer_params dictionary.
Some of them can be changed on the command line.
| Parameter | Command line | Range | Description |
| --- | --- | --- | --- |
| pop_size | --pop-size | &ge; 1 | |
| position_constrain_mode | | default bounce | Resolution of domain limit violations. |
| seed_file | --seed-file | a file path, default none | |
| seed_limit | --seed-limit | 0..pop_size | |
| rfac_limit | | 0..1, default 0.8 | Accept only seed values that have a lower R-factor. |
| recalc_seed | | True or False, default True | |
The domain parameters have the following meanings:
| Parameter | Description |
| --- | --- |
| start | Seed model. The start values are copied into particle 0 of the initial population. |
| min | Lower limit of the parameter range. |
| max | Upper limit of the parameter range. |
| step | Not used. |
\subsection sec_opt_genetic Genetic optimization
The genetic algorithm evolves a population of individuals
by a combination of inheritance, crossover, mutation
and selection in analogy to biological evolution.
The _genes_ are in this case the model parameters,
and selection occurs based on R-factor.
The genetic algorithm is adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012).
It is implemented in the @ref pmsco.optimizers.genetic module.
The genetic optimization is helpful in the first stage of an optimization
where a large parameter space needs to be sampled
and fast convergence on a small part of the parameter space is less desirable
as it might catch on a local optimum.
On the other hand, convergence near the optimum is slower than in the particle swarm.
The genetic optimization should be run with a large number of iterations
rather than a large population size.
The general parameters of the genetic algorithm are specified in the @ref Project.optimizer_params dictionary.
Some of them can be changed on the command line.
| Parameter | Command line | Range | Description |
| --- | --- | --- | --- |
| pop_size | --pop-size | &ge; 1 | |
| mating_factor | | 1..pop_size, default 4 | |
| strong_mutation_probability | | 0..1, default 0.01 | Probability that a parameter undergoes a strong mutation. |
| weak_mutation_probability | | 0..1, default 1 | Probability that a parameter undergoes a weak mutation. This parameters should be left at 1. Lower values tend to produce discrete parameter values. Weak mutations can be tuned by the step domain parameters. |
| position_constrain_mode | | default random | Resolution of domain limit violations. |
| seed_file | --seed-file | a file path, default none | |
| seed_limit | --seed-limit | 0..pop_size | |
| rfac_limit | | 0..1, default 0.8 | Accept only seed values that have a lower R-factor. |
| recalc_seed | | True or False, default True | |
The domain parameters have the following meanings:
| Parameter | Description |
| --- | --- |
| start | Seed model. The start values are copied into particle 0 of the initial population. |
| min | Lower limit of the parameter range. |
| max | Upper limit of the parameter range. |
| step | Standard deviation of the Gaussian distribution of weak mutations. The step should not be much lower than the the parameter range divided by the population size and not greater than one third of the parameter range. |
\subsection sec_opt_grid Grid search
The grid search algorithm samples the parameter space at equidistant steps.
The order of calculations is randomized so that early results represent various parts of the parameter space.
| Parameter | Description |
| --- | --- |
| start | Values of fixed parameters. |
| min | Lower limit of the parameter range. |
| max | Upper limit of the parameter range. If abs(max - min) < step/2 , the parameter is kept constant. |
| step | Step size (distance between two grid points). If step <= 0, the parameter is kept constant. |
\subsection sec_opt_gradient Gradient search
Currently not implemented.
\subsection sec_opt_table Table scan
The table scan calculates models from an explicit table of model parameters.
It can be used to recalculate models from a previous optimization run on other experimental data,
as an interface to external optimizers,
or as a simple input of manually edited model parameters.
The table can be stored in an external file that is specified on the command line,
or supplied in one of several forms by the custom project class.
The table can be left unchanged during the calculations,
or new models can be added on the go.
@attention Because it is not easily possible to know when and which models have been read from the table file, if you do modify the table file during processing, pay attention to the following hints:
1. The file on disk must not be locked for more than a second. Do not keep the file open unnecessarily.
2. _Append_ new models to the end of the table rather than overwriting previous ones. Otherwise, some models may be lost before they have been calculated.
The general parameters of the table scan are specified in the @ref Project.optimizer_params dictionary.
Some of them can be changed on the command line or in the project class (depending on how the project class is implemented).
| Parameter | Command line | Range | Description |
| --- | --- | --- | --- |
| pop_size | --pop-size | &ge; 1 | Number of models in a generation (calculated in parallel). In table mode, this parameter is not so important and can be left at the default. It has nothing to do with table size. |
| table_file | --table-file | a file path, default none | |
The domain parameters have the following meanings.
Models that violate the parameter range are not calculated.
| Parameter | Description |
| --- | --- |
| start | Not used. |
| min | Lower limit of the parameter range. |
| max | Upper limit of the parameter range. |
| step | Not used. |
\subsection sec_opt_single Single model
The single model optimizer calculates the model defined by domain.start.
| Parameter | Description |
| --- | --- |
| start | Values of model parameters. |
| min | Not used. |
| max | Not used. |
| step | Not used. |
*/

View File

@ -0,0 +1,38 @@
@startuml
class CalculationTask {
id : CalcID
parent : CalcID
model : dict
file_root : str
file_ext : str
result_filename : str
modf_filename : str
result_valid : bool
time : datetime.timedelta
files : dict
region : dict
__init__()
__eq__()
__hash__()
copy()
change_id()
format_filename()
get_mpi_message()
set_mpi_message()
add_task_file()
rename_task_file()
remove_task_file()
}
class CalcID {
model
scan
sym
emit
region
}
CalculationTask *-- CalcID
@enduml

View File

@ -0,0 +1,133 @@
@startuml
object Root {
id = -1, -1, -1, -1, -1
parent = -1, -1, -1, -1, -1
model = {}
}
Root o.. Model1
Root o.. Model2
object Model1 {
id = 1, -1, -1, -1, -1
parent = -1, -1, -1, -1, -1
model = {'d': 5}
}
object Model2 {
id = 2, -1, -1, -1, -1
parent = -1, -1, -1, -1, -1
model = {'d': 7}
}
Model1 o.. Scan11
Model1 o.. Scan12
Model2 o.. Scan21
object Scan11 {
id = 1, 1, -1, -1, -1
parent = 1, -1, -1, -1, -1
model = {'d': 5}
}
object Scan12 {
id = 1, 2, -1, -1, -1
parent = 1, -1, -1, -1, -1
model = {'d': 5}
}
object Scan21 {
id = 2, 1, -1, -1, -1
parent = 2, -1, -1, -1, -1
model = {'d': 7}
}
Scan11 o.. Sym111
object Sym111 {
id = 1, 1, 1, -1, -1
parent = 1, 1, -1, -1, -1
model = {'d': 5}
}
Sym111 o.. Emitter1111
object Emitter1111 {
id = 1, 1, 1, 1, -1
parent = 1, 1, 1, -1, -1
model = {'d': 5}
}
Emitter1111 o.. Region11111
object Region11111 {
id = 1, 1, 1, 1, 1
parent = 1, 1, 1, 1, -1
model = {'d': 5}
}
@enduml
@startuml
object "Root: CalculationTask" as Root {
}
note right: all attributes undefined
object "Model: CalculationTask" as Model {
model
}
note right: model is defined\nother attributes undefined
object ModelHandler
object "Scan: CalculationTask" as Scan {
model
scan
}
object ScanHandler
object "Sym: CalculationTask" as Sym {
model
scan
symmetry
}
object "SymmetryHandler" as SymHandler
object "Emitter: CalculationTask" as Emitter {
model
scan
symmetry
emitter
}
object EmitterHandler
object "Region: CalculationTask" as Region {
model
scan
symmetry
emitter
region
}
note right: all attributes well-defined
object RegionHandler
Root "1" o.. "1..*" Model
Model "1" o.. "1..*" Scan
Scan "1" o.. "1..*" Sym
Sym "1" o.. "1..*" Emitter
Emitter "1" o.. "1..*" Region
(Root, Model) .. ModelHandler
(Model, Scan) .. ScanHandler
(Scan, Sym) .. SymHandler
(Sym, Emitter) .. EmitterHandler
(Emitter, Region) .. RegionHandler
@enduml

View File

@ -0,0 +1,90 @@
@startuml
class CalculationTask {
model
scan
symmetry
emitter
region
..
files
}
class Model {
index
..
dlat
dAS
dS1S2
V0
Zsurf
Texp
rmax
}
class Scan {
index
..
filename
mode
initial_state
energies
thetas
phis
alphas
}
class Symmetry {
index
..
rotation
registry
}
class Emitter {
index
}
class Region {
index
..
range
}
CalculationTask *-- Model
CalculationTask *-- Scan
CalculationTask *-- Symmetry
CalculationTask *-- Emitter
CalculationTask *-- Region
class Project {
scans
symmetries
model_handler
cluster_generator
}
class ClusterGenerator {
count_emitters()
create_cluster()
}
class ModelHandler {
create_tasks()
add_result()
}
Model ..> ModelHandler
Scan ..> Project
Symmetry ..> Project
Emitter ..> ClusterGenerator
Region ..> Project
Project *-left- ModelHandler
Project *- ClusterGenerator
hide empty members
@enduml

View File

@ -0,0 +1,47 @@
@startuml
package pmsco {
class Project {
cluster_generator
export_cluster()
}
abstract class ClusterGenerator {
project
{abstract} count_emitters()
{abstract} create_cluster()
}
class LegacyClusterGenerator {
project
count_emitters()
create_cluster()
}
}
package "user project" {
class UserClusterGenerator {
project
count_emitters()
create_cluster()
}
note bottom : for complex cluster
class UserProject {
count_emitters()
create_cluster()
}
note bottom : for simple cluster
}
Project <|-- UserProject
ClusterGenerator <|-- LegacyClusterGenerator
ClusterGenerator <|-- UserClusterGenerator
Project *-- ClusterGenerator
UserProject .> LegacyClusterGenerator
UserProject .> UserClusterGenerator
@enduml

View File

@ -0,0 +1,81 @@
@startuml
class Project << (T,orchid) >> {
id
..
..
name
code
}
class Scan << (T,orchid) >> {
id
..
job_id
..
name
}
class Job << (T,orchid) >> {
id
..
project_id
..
name
mode
machine
git_hash
datetime
description
}
class Model << (T,orchid) >> {
id
..
job_id
..
model
gen
particle
}
class Result << (T,orchid) >> {
id
..
model_id
..
scan
sym
emit
region
rfac
}
class Param << (T,orchid) >> {
id
..
..
key
}
class ParamValue << (T,orchid) >> {
id
..
param_id
model_id
..
value
}
Project "1" *-- "*" Job
Job "1" *-- "*" Model
Job "1" *-- "*" Scan
Param "1" *-- "*" ParamValue
Model "1" *-- "*" ParamValue
Model "1" *-- "*" Result
hide empty members
@enduml

View File

@ -0,0 +1,45 @@
@startuml
start
repeat
:define model tasks;
:gather model results;
repeat while
stop
@enduml
@startuml
start
repeat
partition "generate tasks" {
:define model tasks;
:define scan tasks;
:define symmetry tasks;
:define emitter tasks;
:define region tasks;
}
fork
:calculate task 1;
fork again
:calculate task 2;
fork again
:calculate task N;
end fork
partition "collect results" {
:gather region results;
:gather emitter results;
:gather symmetry results;
:gather scan results;
:gather model results;
}
repeat while
stop
@enduml

View File

@ -0,0 +1,24 @@
@startuml{master-slave-messages.png}
== task execution ==
loop calculation tasks
hnote over Master : define task
Master -> Slave: TAG_NEW_TASK
activate Slave
hnote over Slave : calculation
alt successful
Slave --> Master: TAG_NEW_RESULT
else calculation failed
Slave --> Master: TAG_INVALID_RESULT
else critical error
Slave --> Master: TAG_ERROR_ABORTING
end
deactivate Slave
hnote over Master : collect results
end
...
== termination ==
Master -> Slave: TAG_FINISH
destroy Slave
@enduml

View File

@ -0,0 +1,31 @@
@startuml
package pmsco {
abstract class Project {
mode
code
scans
symmetries
{abstract} create_cluster()
{abstract} create_params()
{abstract} create_domain()
}
}
package projects {
class UserProject {
__init__()
create_cluster()
create_params()
create_domain()
}
}
Project <|-- UserProject
hide empty members
@enduml

View File

@ -0,0 +1,66 @@
@startuml
participant rank0 as "rank 0 (master)"
participant rank1 as "rank 1 (slave)"
participant rank2 as "rank 2 (slave)"
participant rankN as "rank N (slave)"
== initialization ==
rank0 ->> rank0
activate rank0
rnote over rank0: initialize project
== task loop ==
rnote over rank0: specify tasks
rank0 ->> rank1: task 1
activate rank1
rnote over rank1: execute task 1
rank0 ->> rank2: task 2
activate rank2
rnote over rank2: execute task 2
rank0 ->> rankN: task N
deactivate rank0
activate rankN
rnote over rankN: execute task N
rank0 <<-- rank1: result 1
deactivate rank1
rnote over rank0: process results\nspecify tasks
activate rank0
rank0 ->> rank1: task N+1
deactivate rank0
activate rank1
rnote over rank1: execute task N+1
rank0 <<-- rank2: result 2
deactivate rank2
activate rank0
rank0 ->> rank2: task N+2
deactivate rank0
activate rank2
rnote over rank2: execute task N+2
rank0 <<-- rankN: result N
deactivate rankN
activate rank0
rank0 ->> rankN: task 2N
deactivate rank0
activate rankN
rnote over rankN: execute task 2N
rank0 <<-- rank1: result N+1
deactivate rank1
rank0 <<-- rank2: result N+2
deactivate rank2
rank0 <<-- rankN: result 2N
deactivate rankN
rnote over rank0: process results
activate rank0
hnote over rank0: calculations complete
== termination ==
rnote over rank0: report results
rank0 ->> rank1: finish
destroy rank1
rank0 ->> rank2: finish
destroy rank2
rank0 ->> rankN: finish
destroy rankN
deactivate rank0
@enduml

View File

@ -0,0 +1,59 @@
@startuml
abstract class Project {
mode : str = "single"
code : str = "edac"
scans : Scan [1..*]
symmetries : dict [1..*]
cluster_generator : ClusterGenerator
handler_classes
files : FileTracker
{abstract} create_cluster() : Cluster
{abstract} create_params() : Params
{abstract} create_domain() : Domain
}
class Scan {
filename
raw_data
dtype
modulation
mode
emitter
initial_state
energies
thetas
phis
alphas
set_scan()
}
class Domain {
start : dict
min : dict
max : dict
step : dict
add_param(name, start, min, max, step)
get_param(name)
}
class Params {
title
comment
cluster_file
output_file
scan_file
initial_state
polarization
angular_resolution
z_surface
inner_potential
work_function
polar_incidence_angle
azimuthal_incidence_angle
experiment_temperature
}
Project "1" *-- "1..*" Scan
@enduml

View File

@ -0,0 +1,26 @@
@startuml
:model task|
fork
partition "scan 0" {
:define scan;
:scan 0 task|
detach
:scan 0 result|
}
fork again
partition "scan 1" {
:define scan;
:scan 1 task|
detach
:scan 1 result|
}
fork again
partition "scan N" {
:define scan;
:scan N task|
detach
:scan N result|
}
end fork
:model result|
@enduml

View File

@ -0,0 +1,42 @@
@startuml
|user|
start
:setup;
|pmsco|
:initialize;
:import experimental data;
repeat
:define task;
|calculator|
:calculate\ntask;
|pmsco|
:evaluate results;
repeat while
-> [finished];
:report results;
stop
@enduml
@startuml
|pmsco|
start
:define task (model, scan, symmetry, emitter, region);
|project|
:create cluster;
:create parameters;
|calculator|
:scattering calculation;
|pmsco|
:combine results;
|project|
:calculate modulation function;
:calculate R-factor;
stop
@enduml

View File

@ -0,0 +1,21 @@
@startuml
start
:initialize;
:import experimental data|
repeat
:define tasks;
fork
:calculate\ntask 1;
fork again
:calculate\ntask N;
end fork
:evaluate results;
repeat while
-> [finished];
:report results|
stop
@enduml

View File

@ -0,0 +1,23 @@
@startuml
skinparam componentStyle uml2
component "project" as project
component "PMSCO" as pmsco
component "scattering code\n(calculator)" as calculator
interface "command line" as cli
interface "input files" as input
interface "output files" as output
interface "experimental data" as data
interface "results" as results
data -> project
project ..> pmsco
pmsco ..> calculator
cli --> project
input -> calculator
calculator -> output
pmsco -> results
@enduml

View File

@ -0,0 +1,55 @@
@startuml
package pmsco {
abstract class Project {
mode
code
scans
symmetries
cluster_generator
handler_classes
__
{abstract} create_cluster()
{abstract} create_params()
{abstract} create_domain()
..
combine_scans()
combine_symmetries()
combine_emitters()
calc_modulation()
calc_rfactor()
}
abstract class ClusterGenerator {
{abstract} count_emitters()
{abstract} create_cluster()
}
}
package projects {
class UserProject {
scan_dict
__
setup()
..
create_params()
create_domain()
..
combine_symmetries()
}
class UserClusterGenerator {
count_emitters()
create_cluster()
}
}
Project <|-- UserProject
Project *-- ClusterGenerator
ClusterGenerator <|-- UserClusterGenerator
hide empty members
@enduml