update public distribution

based on internal repository c9a2ac8 2019-01-03 16:04:57 +0100 tagged rev-master-2.0.0
2019-01-31 15:45:02 +01:00
parent bbd16d0f94
commit acea809e4e
92 changed files with 165828 additions and 143181 deletions
--- a/docs/config.dox
+++ b/docs/config.dox
@ -38,13 +38,13 @@ PROJECT_NAME           = "PEARL MSCO"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.

-PROJECT_NUMBER         = 
+PROJECT_NUMBER         = $(REVISION)

 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
 # quick idea about the purpose of the project. Keep the description short.

-PROJECT_BRIEF          = "PEARL multiple scattering calculations and optimizations"
+PROJECT_BRIEF          = "PEARL multiple scattering calculation and optimization"

 # With the PROJECT_LOGO tag one can specify a logo or an icon that is included
 # in the documentation. The maximum height of the logo should not exceed 55
@ -228,7 +228,7 @@ TAB_SIZE               = 4
 # "Side Effects:". You can put \n's in the value part of an alias to insert
 # newlines.

-ALIASES                = 
+ALIASES                = "raise=@exception"

 # This tag can be used to specify a number of word-keyword mappings (TCL only).
 # A mapping has the form "name=value". For example adding "class=itcl::class"
@ -597,19 +597,19 @@ STRICT_PROTO_MATCHING  = NO
 # list. This list is created by putting \todo commands in the documentation.
 # The default value is: YES.

-GENERATE_TODOLIST      = YES
+GENERATE_TODOLIST      = NO

 # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
 # list. This list is created by putting \test commands in the documentation.
 # The default value is: YES.

-GENERATE_TESTLIST      = YES
+GENERATE_TESTLIST      = NO

 # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
 # list. This list is created by putting \bug commands in the documentation.
 # The default value is: YES.

-GENERATE_BUGLIST       = YES
+GENERATE_BUGLIST       = NO

 # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
 # the deprecated list. This list is created by putting \deprecated commands in
@ -761,9 +761,12 @@ WARN_LOGFILE           =
 INPUT                  = \
 src/introduction.dox \
 src/concepts.dox \
+src/concepts-tasks.dox \
+src/concepts-emitter.dox \
 src/installation.dox \
 src/execution.dox \
 src/commandline.dox \
+src/optimizers.dox \
                         ../pmsco \
                         ../projects \
                         ../tests
@ -859,7 +862,7 @@ EXAMPLE_RECURSIVE      = NO
 # that contain images that are to be included in the documentation (see the
 # \image command).

-IMAGE_PATH             = 
+IMAGE_PATH             = src/images

 # The INPUT_FILTER tag can be used to specify a program that doxygen should
 # invoke to filter for each input file. Doxygen will invoke the filter program
@ -876,7 +879,7 @@ IMAGE_PATH             =
 # code is scanned, but not when the output code is generated. If lines are added
 # or removed, the anchors will not be placed correctly.

-INPUT_FILTER           = /usr/bin/doxypy
+INPUT_FILTER           =

 # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
 # basis. Doxygen will compare the file name with each pattern and apply the
@ -885,7 +888,7 @@ INPUT_FILTER           = /usr/bin/doxypy
 # filters are used. If the FILTER_PATTERNS tag is empty or if none of the
 # patterns match the file name, INPUT_FILTER is applied.

-FILTER_PATTERNS        = 
+FILTER_PATTERNS        = *.py=/usr/bin/doxypy

 # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
 # INPUT_FILTER) will also be used to filter the input files that are used for
@ -2328,7 +2331,7 @@ DIAFILE_DIRS           =
 # generate a warning when it encounters a \startuml command in this case and
 # will not generate output for the diagram.

-PLANTUML_JAR_PATH      = 
+PLANTUML_JAR_PATH      = $(PLANTUML_JAR_PATH)

 # When using plantuml, the specified paths are searched for files specified by
 # the !include statement in a plantuml block.
--- a/docs/makefile
+++ b/docs/makefile
@ -2,6 +2,11 @@ SHELL=/bin/sh

 # makefile for PMSCO documentation
 #
+# requirements
+#
+# 1) doxygen
+# 2) /usr/bin/doxypy
+# 3) PLANTUML_JAR_PATH environment variable must point to plantUML jar.

 .SUFFIXES:
 .SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .html
@ -11,6 +16,9 @@ DOX=doxygen
 DOXOPTS=
 LATEX_DIR=latex

+REVISION=$(shell git describe --always --tags --dirty --long || echo "unknown, "`date +"%F %T %z"`)
+export REVISION
+
 all: docs

 docs: doxygen pdf
@ -22,5 +30,6 @@ pdf: doxygen
 	-$(MAKE) -C $(LATEX_DIR)

 clean:
-	-rm -rf latex/*
-	-rm -rf html/*
+	-rm -r latex/*
+	-rm -r html/*
+
--- a/docs/src/commandline.dox
+++ b/docs/src/commandline.dox
@ -11,14 +11,19 @@ it is recommended to adhere to the standard syntax described below.

 The basic command line is as follows:
@code{.sh}
-[mpiexec -np NPROCESSES] python path-to-project.py [common args] [project args]
+[mpiexec -np NPROCESSES] python path/to/pmsco path/to/project.py [common args] [project args]
@endcode

 Include the first portion between square brackets if you want to run parallel processes.
 Specify the number of processes as the @c -np option.
-@c path-to-project.py should be the path and name to your project module.
+@c path/to/pmsco is the directory where <code>__main.py__</code> is located.
+Do not include the extension <code>.py</code> or a trailing slash.
+@c path/to/project.py should be the path and name to your project module.
 Common args and project args are described below.

+Note: In contrast to earlier versions, the project module is not executed directly any more.
+Rather, it is loaded by the main pmsco module as a 'plug-in'.
+

 \subsection sec_common_args Common Arguments

@ -30,7 +35,7 @@ The following table is ordered by importance.
 | Option | Values | Description |
 | --- | --- | --- |
 | -h , --help | | Display a command line summary and exit. |
-| -m , --mode | single (default), grid, swarm | Operation mode. |
+| -m , --mode | single (default), grid, swarm, genetic | Operation mode. |
 | -d, --data-dir | file system path | Directory path for experimental data files (if required by project). Default: current working directory. |
 | -o, --output-file | file system path | Base path and/or name for intermediate and output files. Default: pmsco_data |
 | -t, --time-limit | decimal number | Wall time limit in hours. The optimizers try to finish before the limit. Default: 24.0. |
@ -39,6 +44,8 @@ The following table is ordered by importance.
 | --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
 | --log-disable | | Disable logging. By default, logging is on. |
 | --pop-size | integer | Population size (number of particles) in swarm optimization mode. The default value is the greater of 4 or two times the number of calculation processes. |
+| --seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
+| --table-file | file system path | Name of the model table file in table scan mode. |
 | -c, --code | edac (default) | Scattering code. At the moment, only edac is supported. |


@ -49,13 +56,14 @@ Multiple names can be specified and must be separated by spaces.

 | Category | Description | Default Action |
 | --- | --- | --- |
+| all | shortcut to include all categories | |
 | input |      raw input files for calculator, including cluster and phase files in custom format | delete |
 | output |     raw output files from calculator | delete |
 | phase |      phase files in portable format for report |  delete |
 | cluster |    cluster files in portable XYZ format for report | keep |
 | debug |      debug files |  delete |
 | model |       output files in ETPAI format: complete simulation  (a_-1_-1_-1_-1) | keep |
-| scan |       output files in ETPAI format: scan (a_b_-1_-1_-1) |  delete |
+| scan |       output files in ETPAI format: scan (a_b_-1_-1_-1) |  keep |
 | symmetry |   output files in ETPAI format: symmetry (a_b_c_-1_-1) |  delete |
 | emitter |    output files in ETPAI format: emitter (a_b_c_d_-1) |  delete |
 | region |     output files in ETPAI format: region (a_b_c_d_e) |  delete |
@ -84,36 +92,11 @@ This way, the file names and photoelectron parameters are versioned with the cod
 whereas command line arguments may easily get forgotten in the records.


-\subsection sec_project_example Example Argument Handling
+\subsection sec_project_example Argument Handling

-An example for handling the command line in a project module can be found in the twoatom.py demo project.
-The following code snippet shows how the common and project arguments are separated and handled.
-
-@code{.py}
-def main():
-    # have the pmsco module parse the common arguments.
-    args, unknown_args = pmsco.pmsco.parse_cli()
-
-    # pass any arguments not handled by pmsco
-    # to the project-defined parse_project_args function.
-    # unknown_args can be passed to argparse.ArgumentParser.parse_args().
-    if unknown_args:
-        project_args = parse_project_args(unknown_args)
-    else:
-        project_args = None
-
-    # create the project object
-    project = create_project()
-
-    # apply the common arguments on the project
-    pmsco.pmsco.set_common_args(project, args)
-
-    # apply the specific arguments on the project
-    set_project_args(project, project_args)
-
-    # run the project
-    pmsco.pmsco.run_project(project)
-@endcode
+To handle command line arguments in a project module,
+the module must define a <code>parse_project_args</code> and a <code>set_project_args</code> function.
+An example can be found in the twoatom.py demo project.


 \section sec_slurm Slurm Job Submission
@ -122,23 +105,24 @@ The command line of the Slurm job submission script for the Ra cluster at PSI is
 This script is specific to the configuration of the Ra cluster but may be adapted to other Slurm-based queues.

@code{.sh}
-qpmsco.sh [NOSUB] JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
+qpmsco.sh [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
@endcode

 Here, the first few arguments are positional and their order must be strictly adhered to.
 After the positional arguments, optional arguments of the PMSCO project command line can be added in arbitrary order.
 If you execute the script without arguments, it displays a short summary.
-The job script is written to @c ~/jobs/\$JOBNAME.
+The job script is written to @c $DESTDIR/$JOBNAME which is also the destination of calculation output.

 | Argument | Values | Description |
 | --- | --- | --- |
 | NOSUB (optional) | NOSUB or omitted | If NOSUB is present as the first argument, create the job script but do not submit it to the queue. Otherwise, submit the job script. |
+| DESTDIR | file system path | destination directory. must exist. a sub-dir $JOBNAME is created. |
 | JOBNAME | text | Name of job. Use only alphanumeric characters, no spaces. |
 | NODES | integer | Number of computing nodes. (1 node = 24 or 32 processors). Do not specify more than 2. |
 | TASKS_PER_NODE | 1...24, or 32 | Number of processes per node. 24 or 32 for full-node allocation. 1...23 for shared node allocation. |
 | WALLTIME:HOURS | integer | Requested wall time. 1...24 for day partition, 24...192 for week partition, 1...192 for shared partition. This value is also passed on to PMSCO as the @c --time-limit argument. |
 | PROJECT | file system path | Python module (file path) that declares the project and starts the calculation. |
-| MODE | single, swarm, grid | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
+| MODE | single, swarm, grid, genetic | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
 | ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |

 */
--- a/docs/src/concepts-emitter.dox
+++ b/docs/src/concepts-emitter.dox
@ -0,0 +1,185 @@
+/*! @page pag_concepts_emitter Emitter configurations
+
+\section sec_emitters Emitter configurations
+
+\subsection sec_emit_intro Introduction
+
+Since emitters contribute incoherently to the diffraction pattern,
+it should make no difference how the emitters are grouped and calculated.
+This fact can be used to distribute a calculation over multiple parallel processes
+if each process calculates the diffraction pattern coming from one particular emitter atom.
+In effect, some calculation codes are implemented for a single emitter per calculation.
+
+With PMSCO, it is easy to distribute the emitters over parallel processes.
+The project just declares the number of emitters and returns one specific cluster per emitter.
+In the simplest case, this means that the emitter attribute of the cluster atoms is set differently,
+while the atomic coordinates are the same for all clusters generated.
+PMSCO takes care of dispatching the clusters to multiple calculation processes
+depending on the number of allocated MPI processes
+as well as summing up the resulting diffraction patterns.
+
+In addition, the emitter framework also supports that clusters are tailored to a specific emitter configuration.
+Suppose that the unit cell contains a large number of inequivalent emitters.
+If all emitters had to be included in a single calculation,
+the cluster would grow very large and the calculation would include many long scattering paths
+that effectively did not contribute intensity to the final result.
+Splitting a large cluster into small ones built locally around one emitter
+can provide a significant performance gain in complex systems.
+
+Note that the emitter framework does not require that an emitter _configuration_ contains only one emitter _atom_.
+It is up to the project to define how many emitter configurations there are and what they encompass.
+This should, however, normally not be necessary.
+To avoid confusion, it is recommended to declare exactly one emitter atom per configuration.
+
+
+\subsection sec_emit_implement Implementation
+
+There are several implementation routes with varying complexity.
+Which route to take can depend on the complexity of the system and/or the programming skills of the user.
+The following class diagram illustrates the classes and packages involved in cluster generation.
+
+@startuml "class diagram for cluster generation"
+
+package pmsco {
+    class Project {
+        cluster_generator
+        export_cluster()
+    }
+
+    abstract class ClusterGenerator {
+        project
+        {abstract} count_emitters()
+        {abstract} create_cluster()
+    }
+
+    class LegacyClusterGenerator {
+        project
+        count_emitters()
+        create_cluster()
+    }
+}
+
+package "user project" {
+    class UserClusterGenerator {
+        project
+        count_emitters()
+        create_cluster()
+    }
+
+    note bottom : for complex cluster
+
+    class UserProject {
+        count_emitters()
+        create_cluster()
+    }
+
+    note bottom : for simple cluster
+
+}
+
+Project <|-- UserProject
+ClusterGenerator <|-- LegacyClusterGenerator
+ClusterGenerator <|-- UserClusterGenerator
+Project *-- ClusterGenerator
+UserProject .> LegacyClusterGenerator
+UserProject .> UserClusterGenerator
+
+@enduml
+
+In general, the cluster is generated by calls to the project's cluster_generator object.
+This can be either a custom generator class derived from pmsco.cluster.ClusterGenerator
+or the default pmsco.cluster.LegacyClusterGenerator which calls the UserProject.
+For simple clusters, it may be sufficient to implement the cluster directly in the user project class
+(UserProject in the diagram).
+For more complex systems, it is recommended to implement a custom cluster generator class
+(UserClusterGenerator).
+
+
+\subsubsection sec_emit_implement_legacy Static cluster implemented in project methods
+
+This is the most simple route as it requires the implementation of one or two methods of the user project class.
+It can be used for single-emitter and multi-emitter problems.
+This implementation is active while a pmsco.cluster.LegacyClusterGenerator
+is assigned to the project's cluster_generator attribute.
+
+1. Implement a count_emitters method in your project class
+   if the project uses more than one emitter configurations.
+   It must have same method contract as pmsco.cluster.ClusterGenerator.count_emitters.
+   Specifically, it must return the number of emitter configurations of a given model, scan and symmetry.
+   If there is only one configuration, the method does not need to be implemented.
+
+2. Implement a create_cluster method in your project class.
+   It must have same method contract as pmsco.cluster.ClusterGenerator.create_cluster.
+   Specifically, it must return a cluster.Cluster object for the given model, scan, symmetry and emitter configuration.
+   The emitter atoms must be marked according to the emitter configuration specified by the index argument.
+   Note that, depending on the index.emit argument, all emitter atoms must be marked
+   or only the ones of the corresponding emitter configuration.
+
+3. (Optionally) override the pmsco.project.Project.combine_emitters method
+   if the emitters should be added with non-uniform weights.
+
+Although it's possible to produce emitter-dependent clusters using this approach,
+this is usually not recommended.
+Rather, the generator approach described below should be followed in this case.
+
+
+\subsubsection sec_emit_implement_generator Static cluster implemented by generator class
+
+The preferred way of creating clusters is to implement a _generator_ class
+because it is the most scalable way from simple to complex systems.
+In addition, one cluster generator class can be quickly exchanged for another
+if there are multiple possibilities.
+
+1. Implement a cluster generator class which inherits from pmsco.cluster.ClusterGenerator
+   in your project module.
+
+2. Implement the create_cluster and count_emitters methods of the generator.
+   The method contracts are the same as the ones described in the previous paragraph,
+   just in the context of a separate class.
+
+3. Initialize an instance of the generator and assign it to the project.cluster_generator attribute
+   in the initialization of your project.
+
+
+\subsubsection sec_emit_implement_local Local clusters implemented by generator class
+
+The basic method contract outlined in the previous paragraph is equally applicable to the case
+where a local cluster is generated for each emitter configuration.
+Again, the generator class with the two methods (count_emitters and create_cluster) is the minimum requirement.
+However, for ease of code maintenance and/or for improved performance of large clusters,
+some internal structure may be helpful.
+
+Suppose that the system consists of a large supercell containing many emitters
+and that a small cluster shall be built for each emitter configuration.
+During the calculations, the generator will receive several calls to the count_emitters and create_cluster methods.
+Every time the model and index are the same, the functions must return the same result.
+Thus, most importantly, the implementation must make sure that the results are fully deterministic.
+Second, depending on the complexity, it could be more efficient to cache a cluster for later use.
+
+One way to reduce the complexity is to introduce a _master cluster_
+from which the emitter configurations and individual clusters are derived.
+
+1. Implement a master_cluster method with the same arguments and result types as create_cluster.
+   The method returns a full cluster of the supercell and its neighbouring cells.
+   All inequivalent emitters are marked (which determines the number of emitter configurations).
+
+2. Decorate the master_cluster with pmsco.dispatch.CachedCalculationMethod.
+   This pre-defined decorator transparently caches the cluster
+   so that subsequent calls with the same arguments do not re-create the cluster but return the cached one.
+
+3. The count_emitters method can simply return the emitter count of the master cluster.
+
+4. The create_cluster method calls master_cluster() and extracts the region
+   corresponding to the requested emitter configuration.
+
+
+\subsection sec_emit_report Reporting
+
+The pmsco.project.Project class implements a method that saves a cluster to two XYZ files,
+one containing the coordinates of all atoms
+and one containing only the coordinates of the emitters.
+
+The method is called for each cluster that is passed to the calculator, i.e., each emitter index.
+You may override the method in your project to alter the reporting.
+
+*/
--- a/docs/src/concepts-model.dox
+++ b/docs/src/concepts-model.dox
@ -0,0 +1,3 @@
+/*! @page pag_concepts_model Model
+
+*/
--- a/docs/src/concepts-region.dox
+++ b/docs/src/concepts-region.dox
@ -0,0 +1,3 @@
+/*! @page pag_concepts_region Region
+
+*/
--- a/docs/src/concepts-scan.dox
+++ b/docs/src/concepts-scan.dox
@ -0,0 +1,31 @@
+/*! @page pag_concepts_scan Scans
+
+\section sec_scanning Scanning
+
+PMSCO with EDAC currently supports the following scan axes.
+
+- kinetic energy E
+- polar angle theta T
+- azimuthal angle phi P
+- analyser angle alpha A
+
+The following combinations of these scan axes are allowed (see pmsco.data.SCANTYPES).
+
+- E
+- E-T
+- E-A
+- T-P (hemispherical or hologram scan)
+
+@attention The T and A axes cannot be combined.
+If a scan of one of them is specified, the other is assumed to be fixed at zero!
+This assumption may change in the future,
+so it is best to explicitly set the fixed angle to zero in the scan file.
+
+@remark According to the measurement geometry at PEARL,
+alpha scans are implemented in EDAC as theta scans at phi = 90 in fixed cluster mode.
+The switch to fixed cluster mode is made by PMSCO internally,
+no change of angles or other parameters is necessary in the scan or project files
+besides filling the alpha instead of the theta column.
+
+
+*/
--- a/docs/src/concepts-symmetry.dox
+++ b/docs/src/concepts-symmetry.dox
@ -0,0 +1,32 @@
+/*! @page pag_concepts_symmetry Symmetry
+
+\section sec_symmetry Symmetry and Domain Averaging
+
+A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
+that is derived from the same set of model parameters
+and that contributes incoherently to the measured diffraction pattern.
+A symmetry may be represented by a special symmetry parameter which is not subject to optimization.
+
+For instance, a real sample may have additional rotational domains that are not present in the cluster,
+increasing the symmetry from three-fold to six-fold.
+Or, an adsorbate may be present in a number of different lateral configurations on the substrate.
+In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement.
+In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations.
+
+PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks).
+The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level.
+This section explains the necessary steps.
+
+1. Your project needs to populate the pmsco.project.Project.symmetries list.
+   For each symmetry, add a dictionary of symmetry parameters,  e.g. <code>{'angle_azi': 15.0}</code>.
+   There must be at least one symmetry in a project, otherwise no calculation is executed.
+
+2. The project may apply the symmetry of a task to the cluster and parameter file if necessary.
+   The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters.
+
+3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement.
+   The default method implemented in pmsco.project.Project just adds up all calculations with equal weight.
+   If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm.
+
+
+*/
--- a/docs/src/concepts-tasks.dox
+++ b/docs/src/concepts-tasks.dox
@ -0,0 +1,306 @@
+/*! @page pag_concepts_tasks Task concept
+\section sec_tasks Calculation tasks
+
+A _calculation task_ defines a concrete set of model parameters, atomic coordinates, emitter configuration,
+experimental reference and meta-data (such as file names)
+that completely defines how to produce the input data for the scattering program (the _calculator_).
+For each task, the calculator is executed once and produces one result dataset.
+In a typical optimization project, however, the calculator is executed multiple times for various reasons
+mandated by the project but also efficient calculations in a multi-process environment:
+
+1. The calculation must be repeated under variation of parameters.
+   A concrete set of parameters is called @ref sec_task_model.
+2. The sample was measured multiple times or under different conditions (initial states, photon energy, emission angle).
+   Each contiguous measured dataset is called a @ref sec_task_scan.
+3. The measurement averages over multiple inequivalent domains, cf. @ref sec_task_symmetry.
+4. The measurement includes multiple geometrically inequivalent emitters, cf. @ref sec_task_emitter.
+5. The calculation should be distributed over multiple processes that run in parallel to reduce the wall time, cf. @ref sec_task_region.
+
+In PMSCO, these aspects are modelled as attributes of a calculation task
+as shown schematically in the following diagram.
+
+@startuml "attributes of a calculation task"
+
+class CalculationTask {
+model
+scan
+symmetry
+emitter
+region
+..
+files
+}
+
+class Model {
+    index
+    ..
+    dlat
+    dAS
+    dS1S2
+    V0
+    Zsurf
+    Texp
+    rmax
+}
+
+class Scan {
+    index
+    ..
+    filename
+    mode
+    initial_state
+    energies
+    thetas
+    phis
+    alphas
+}
+
+class Symmetry {
+    index
+    ..
+    rotation
+    registry
+}
+
+class Emitter {
+    index
+
+}
+
+class Region {
+    index
+    ..
+    range
+}
+
+CalculationTask *-- Model
+CalculationTask *-- Scan
+CalculationTask *-- Symmetry
+CalculationTask *-- Emitter
+CalculationTask *-- Region
+
+class Project {
+    scans
+    symmetries
+    model_handler
+    cluster_generator
+}
+
+    class ClusterGenerator {
+        count_emitters()
+        create_cluster()
+    }
+
+class ModelHandler {
+    create_tasks()
+    add_result()
+}
+
+Model ..> ModelHandler
+Scan ..> Project
+Symmetry ..> Project
+Emitter ..> ClusterGenerator
+Region ..> Project
+
+Project *-left- ModelHandler
+Project *- ClusterGenerator
+
+hide empty members
+
+@enduml
+
+Although the attributes may have quite different types (as detailed below),
+each instance is also given a unique (per attribute) integer index,
+where -1 means that the attribute is undefined.
+The indices of the five attributes together (pmsco.dispatch.CalcID tuple)
+serve internally to identify a task and the data belonging it.
+The identifier appears, for instance, in input and output file names.
+Normally, data files are deleted after the calculation, and only a few top-level files are kept
+(can be overridden at the command line or in the project code).
+At the top level, only the model ID is set, the other ones are undefined (-1).
+
+
+\subsection sec_task_model Model
+
+The _model_ attribute is a dictionary of continuously variable parameters of the system such as lattice constants, relaxation constants, rotation angles, etc.
+It may also define non-structural or non-physical parameters such as temperature, inner potential or cluster radius.
+
+The dictionary contains key-value pairs where the keys are up to the user project (the figure shows some examples).
+The values are floating-point numbers that are chosen by the model handler within the domain specified by the user project.
+
+Models are generated by the chosen optimizer according to a particular algorithm or, in single mode, directly by the project.
+Each specific instance of model parameters is given a unique index that allows to identify related input and output files.
+Model parameters are reported with the corresponding R-factors during the optimization process.
+
+
+\subsection sec_task_scan Scan
+
+The _scan_ attribute is an index into the list of scans defined by the user project.
+Each scan refers to one experimental data file and, thus, defines the initial and final states of the photoelectron.
+PMSCO runs a separate calculation for each scan file and compares the combined results to the experimental data.
+This is sometimes called a _global fit_.
+
+
+\subsection sec_task_symmetry Symmetry
+
+A _symmetry_ is a discrete variant of a set of calculation parameters (including the atomic cluster)
+that is independent of the _model_ and contributes incoherently to the measured diffraction pattern.
+For instance, for a system that includes two inequivalent structural domains,
+two separate clusters have to be generated and calculated for each model.
+
+The symmetry parameter is not subject to optimization.
+However, if the branching ratio is unknown a priori, a model parameter can be introduced
+to control the relative contribution of a particular symmetry to the diffraction pattern.
+In that case, the @ref pmsco.project.Project.combine_symmetries method must be overridden.
+
+A symmetry is identified by its index which is an index into the project's symmetries table (pmsco.project.Project.symmetries).
+It is up to the user project to give a physical description of the symmetry, e.g. a rotation angle,
+by assigning a meaningful value (e.g. a dictionary with key-value pairs) to the symmetries table.
+The cluster generator can then read the value from the table rather than from constants in the code.
+
+The figure shows two examples of symmetry parameters.
+The corresponding symmetry table could be set up like this:
+
+@code{.py}
+project.add_symmetry = {'rotation': 0.0, 'registry': 0.0}
+project.add_symmetry = {'rotation': 30.0, 'registry': 0.0}
+@endcode
+
+
+\subsection sec_task_emitter Emitter
+
+The _emitter_ component of the calculation task selects a specific emitter configuration of the cluster generator.
+This is merely an index whose interpretation is up to the cluster generator.
+The default emitter handler enumerates the emitter index from 1 to the emitter count reported by the cluster generator.
+
+The emitter count and list of emitters may depend on model, scan and symmetry.
+
+The cluster generator can tailor a cluster to the given model, scan, symmetry and emitter index.
+For example, in a large unit cell with many inequivalent emitters,
+the generator might return a small sub-cluster around the actual emitter for better calculation performance
+since the distant atoms of the unit cell do not contribute to the diffraction pattern.
+
+Emitter branching must be requested specifically by using a particular pattern in the code.
+By default, it is disabled, which allows the cluster code to be written in a slightly easier way.
+
+
+\subsection sec_task_region Region
+
+The _region_ handler may split a scan region into several smaller chunks
+so that the tasks can be distributed to multiple processes.
+
+Chunking by energy regions is enabled automatically if the project contains an energy scan of at least 10 points
+and the project is run in multiple processes.
+It can be disabled by the user project.
+
+
+\section sec_task_handler Task handlers
+
+The previous section described the five important attributes of a calculation task.
+These attributes span a five-dimensional index space
+where each point maps to one task and, consequently, one calculation and one result dataset.
+To populate the index space, however, calculation tasks are more adequately arranged in a tree-like hierarchy with five levels.
+The code that defines attributes and processes results can then be separated into _handlers_.
+
+Each level calls for a particular functional contract of the handler.
+According to object-oriented principles the contracts at the five levels are defined by abstract base classes
+which can be sub-classed for more specific behaviour.
+For instance, the class of the model handler is chosen based on the execution mode (single, grid, swarm, etc.).
+Though it is possible for a project to define its own handlers,
+the PMSCO core declares handlers that should cover most calculation scenarios.
+
+The following diagram shows the tree of calculation tasks and how handlers act on the task objects to populate the task attributes.
+At the top of the tree, an empty task object (all attributes undefined) is fed into the model level handler which takes care of the model attribute.
+The model handler generates a number of sub-tasks, one for each set of model parameters.
+Each of these (incompletely defined) tasks is then passed to the next handler, and so on.
+
+@startuml "calculation task hierarchy and task handler stack"
+
+object "Root: CalculationTask" as Root {
+index = (-1,-1,-1,-1,-1)
+}
+note right: all attributes undefined
+
+object "Model: CalculationTask" as Model {
+index = (i,-1,-1,-1,-1)
+model
+}
+note right: model is defined\nother attributes undefined
+
+object ModelHandler
+
+object "Scan: CalculationTask" as Scan {
+index = (i,j,-1,-1,-1)
+model
+scan
+}
+
+object ScanHandler
+
+object "Sym: CalculationTask" as Sym {
+index = (i,j,k,-1,-1)
+model
+scan
+symmetry
+}
+
+object "SymmetryHandler" as SymHandler
+
+object "Emitter: CalculationTask" as Emitter {
+index = (i,j,k,l,-1)
+model
+scan
+symmetry
+emitter
+}
+
+object EmitterHandler
+
+object "Region: CalculationTask" as Region {
+index = (i,j,k,l,m)
+model
+scan
+symmetry
+emitter
+region
+}
+note right: all attributes well-defined
+
+object RegionHandler
+
+Root "1" o.. "1..*" Model
+Model "1" o.. "1..*" Scan
+Scan "1" o.. "1..*" Sym
+Sym "1" o.. "1..*" Emitter
+Emitter "1" o.. "1..*" Region
+
+(Root, Model) .. ModelHandler
+(Model, Scan) .. ScanHandler
+(Scan, Sym) .. SymHandler
+(Sym, Emitter) .. EmitterHandler
+(Emitter, Region) .. RegionHandler
+
+@enduml
+
+At the end of the stack, the tasks are fully specified and are passed to the calculation queue.
+They are dispatched to the available processes of the MPI environment in which PMSCO was started,
+which allows calculations to be run in parallel.
+Only now that the model is broken down into multiple, fully specified tasks,
+the cluster and input files are generated, and the calculation program is started.
+
+At the end of a calculation, the output files are associated with their original task objects,
+and the tasks are passed back through the task handler stack.
+In this phase, each level joins the datasets from the sub-tasks to the data requested by the parent task.
+For example, at the lowest level, one result file is present for each region.
+The region handler gathers all files that correspond to the same parent task
+(i.e. have the same emitter, symmetry, scan and model attributes),
+joins them to one file which includes all regions,
+links the file to the parent task and passes the result to the next higher level.
+
+On the top level, the model handler compares the result to the experimental data.
+Depending on the operation mode, it refines the model parameters and issues new tasks by passing them down the stack.
+When the optimization is finished (according to a set of defined criteria),
+The model handler returns the root task to the caller, which causes PMSCO to exit.
+
+*/
+
--- a/docs/src/concepts.dox
+++ b/docs/src/concepts.dox
@ -1,153 +1,85 @@
-/*! @page pag_concepts Design Concepts
-\section sec_tasks Tasks
+/*! @page pag_concepts Design

-In an optimization project, a number of optimizable, high-level parameters generated by the optimization algorithm
-must be mapped to the input parameters and atomic coordinates before the calculation program is executed.
-Possibly, the calculation program is executed multiple times for inequivalent domains, emitters or scan geometries.
-After the calculation, the output is collected, compared to the experimental data, and the model is refined.
-In PMSCO, the optimization is broken down into a set of _tasks_ and assigned to a stack of task _handlers_ according to the following figure.
-Each invocation of the scattering program (EDAC) runs a specific task,
-i.e. a calculation for a set of specific parameters, a fully-qualified cluster of atoms, and a specific angle and/or energy scan.
+\section sec_components Components

-\dotfile tasks.dot "PMSCO task stack"
+The code for a PMSCO job consists of the following components.

-At the root, the _model handler_ proposes models that need to be calculated according to the operation mode specified at the command line.
-A _model_ is the minimum set of variable parameters in the context of a custom project.
-Other parameters that will not vary under optimization are set directly by the project code.
-The model handler may generate models based on a fixed scheme, e.g. on a grid, or based on R-factors of previous results.
+@startuml "top-level components of scattering and optimization code"

-For each model, one task is passed to the task handling chain, starting with the scan handler.
-The _scan handler_ generates sub-tasks for each experimental scan dataset.
-This way, the model can be optimized for multiple experimental scans in the same run (see Sec. \ref sec_scanning).
+skinparam componentStyle uml2

-The _symmetry handler_ generates sub-tasks based on the number of symmetries contained in the experimental data (see Sec. \ref sec_symmetry).
-For instance, for a system that includes two inequivalent structural domains, two separate calculations have to be run for each model.
-The symmetry handler is implemented on the project level and may be customized for a specific system.
+component "project" as project
+component "PMSCO" as pmsco
+component "scattering code\n(calculator)" as calculator

-The _emitter handler_ generates a sub-task for each inequivalent emitter atom
-so that the tasks can be distributed to multiple processes (see Sec. \ref sec_emitters).
-In a single-process environment, all emitters are calculated in one task.
+interface "command line" as cli
+interface "input files" as input
+interface "output files" as output
+interface "experimental data" as data
+interface "results" as results

-The _region handler_ may split a scan region into several smaller chunks
-so that the tasks can be distributed to multiple processes.
-With EDAC, only energy scans can benefit from chunking
-since it always calculates the full angular distribution.
-This layer has to be enabled specifically in the project module.
-It is disabled by default.
+data -> project
+project ..> pmsco
+pmsco ..> calculator
+cli --> project
+input -> calculator
+calculator -> output
+pmsco -> results

-At the end of the stack, the tasks are fully specified and are passed to the calculation queue.
-They are dispatched to the available processes of the MPI environment in which PMSCO was started,
-which allows calculations to be run in parallel.
-Only now that the model is broken down into multiple tasks,
-the cluster and input files are generated, and the calculation program is started.
-
-At the end of a calculation, the output is passed back through the task handler stack.
-In this phase, each level gathers the datasets from the sub-tasks to the data requested by the parent task
-and passes the result to the next higher level.
-
-On the top level, the calculation is compared to the experimental data.
-Depending on the operation mode, the model parameters are refined, and new tasks issued.
-If the optimization is finished according to a set of defined criteria, PMSCO exits.
-
-As an implentation detail, each task is given a unique _identifier_ consisting of five integer numbers
-which correspond to the five levels model, scan, symmetry, emitter and region.
-The identifier appears in the file names in the communication with the scattering program.
-Normally, the data files are deleted after the calculation, and only a few top-level files are kept
-(can be overridden at the command line or in the project code).
-At the top level, only the model ID is set, the other ones are undefined (-1).
+@enduml


-\section sec_symmetry Symmetry and Domain Averaging
+The _project_ consists of program code, system and experimental parameters
+that are specific to a particular experiment and calculation job.
+The project code reads experimental data, defines the parameter dictionary of the model,
+and contains code to generate the cluster, parameter and phase files for the scattering code.
+The project is also the main entry point of process execution.

-A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
-that is derived from the same set of model parameters
-and that contributes incoherently to the measured diffraction pattern.
-A symmetry may be represented by a special symmetry parameter which is not subject to optimization.
+The _scattering code_ on the other hand is a static calculation engine
+which accepts detailed input files
+(parameters, atomic coordinates, emitter specification, scattering phases)
+and outputs an intensity distribution of photoelectrons versus energy and/or angle.

-For instance, a real sample may have additional rotational domains that are not present in the cluster,
-increasing the symmetry from three-fold to six-fold.
-Or, an adsorbate may be present in a number of different lateral configurations on the substrate.
-In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement.
-In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations.
-
-PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks).
-The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level.
-This section explains the necessary steps.
-
-1. Your project needs to populate the pmsco.project.Project.symmetries list.
-   For each symmetry, add a dictionary of symmetry parameters,  e.g. <code>{'angle_azi': 15.0}</code>.
-   There must be at least one symmetry in a project, otherwise no calculation is executed.
-
-2. The project may apply the symmetry of a task to the cluster and parameter file if necessary.
-   The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters.
-
-3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement.
-   The default method implemented in pmsco.project.Project just adds up all calculations with equal weight.
-   If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm.
+The _PMSCO core_ interfaces between the project and the calculator.
+It carries out the structural optimization and manages the calculation tasks.
+It generates and sends input files to the calculator and reads back the output.


-\section sec_scanning Scanning
+\section sec_control_flow Control flow

-PMSCO with EDAC currently supports the following scan axes.
+The basic control flow of a optimization job is depicted schematically in the following figure.

- kinetic energy E
- polar angle theta T
- azimuthal angle phi P
- analyser angle alpha A
+@startuml "top-level activity diagram"

-The following combinations of these scan axes are allowed (see pmsco.data.SCANTYPES).
+start
+:initialize;
+:import experimental data;
+repeat
+:define tasks;
+fork
+:calculate\ntask 1;
+fork again
+:calculate\ntask N;
+end fork
+:evaluate results;
+repeat while
+-> [finished];
+:report results;

- E
- E-T
- E-A
- T-P (hemispherical or hologram scan)
+stop

-@attention The T and A axes cannot be combined.
-If a scan of one of them is specified, the other is assumed to be fixed at zero!
-This assumption may change in the future,
-so it is best to explicitly set the fixed angle to zero in the scan file.
+@enduml

-@remark According to the measurement geometry at PEARL,
-alpha scans are implemented in EDAC as theta scans at phi = 90 in fixed cluster mode.
-The switch to fixed cluster mode is made by PMSCO internally,
-no change of angles or other parameters is necessary in the scan or project files
-besides filling the alpha instead of the theta column.
+After importing experimental data and setting up the model dictionary and job parameters,
+the calculation tasks are defined depending on the execution mode and system setup.
+Each task consists of a specific set of model, experimental and calculation parameters
+that describe an independent calculation step,
+while several steps may be required to produce a dataset that can be compared to the experimental data.
+The idea is that tasks can be defined quickly
+and that the time-consuming operations are dispatched to slave processes which can run in parallel.

-
-\section sec_emitters Emitter Configurations
-
-Since emitters contribute incoherently to the diffraction pattern,
-it should make no difference how the emitters are grouped and calculated.
-EDAC allows to specify multiple emitters in one calculation.
-However, running EDAC multiple times for a single-emitter configuration or simply summing up the results
-gives the same final diffraction pattern with no significant difference of used CPU time.
-It is, thus, easy to distribute the emitters over parallel processes in a multi-process environment.
-PMSCO can handle this transparently with a minimal effort.
-
-Within the same framework, PMSCO also supports that clusters are tailored to a specific emitter configuration.
-Suppose that the unit cell contains a large number of inequivalent emitters.
-If all emitters had to be included in a single calculation,
-the cluster would grow very large and the calculation would take a long time
-because it would include many long scattering paths
-that effectively do not contribute intensity to the final result.
-Using single-emitters, a cluster can be built locally around the emitter and kept to a reasonable size.
-
-Even when using this feature, PMSCO does not require that each configuration contains only one emitter.
-The term _emitter_ effectively means _emitter configuration_.
-A configuration can include multiple emitters which will not be broken up further.
-It is up to the project, what is included in a particular configuration.
-
-To enable emitter handling,
-
-1. override the count_emitters method of your cluster generator
-   and return the number of emitter configurations of a given model, scan and symmetry.
-
-2. handle the emitter index in your create_cluster method.
-
-3. (optionally) override the pmsco.project.Project.combine_emitters method
-   if the emitters should not be added with equal weights.
-
-For implementation details see the respective method descriptions.
+As soon as all necessary results are available they are combined into one dataset and compared to the experimental data.
+Depending on the execution mode, the process of task definition and calculation repeats until the model has converged
+or the calculations are stopped for another reason.

 */
-
--- a/docs/src/execution.dox
+++ b/docs/src/execution.dox
@ -14,26 +14,29 @@ Run PMSCO from the command prompt:

@code{.sh}
 cd work-dir
-python project-dir/project.py [pmsco-arguments] [project-arguments]
+python pmsco-dir project-dir/project.py [pmsco-arguments] [project-arguments]
@endcode

 where <code>work-dir</code> is the destination directory for output files,
+<code>pmsco-dir</code> is the directory containing the <code>__main__.py</code> file,
 <code>project.py</code> is the specific project module,
 and <code>project-dir</code> is the directory where the project file is located.
 PMSCO is run in one process which handles all calculations sequentially.

-The command line arguments are usually divided into common arguments interpreted by the main pmsco code (pmsco.py),
+The command line arguments are divided into common arguments interpreted by the main pmsco code (pmsco.py),
 and project-specific arguments interpreted by the project module.
-However, it is ultimately up to the project module how the command line is interpreted.

 Example command line for a single EDAC calculation of the two-atom project:
@code{.sh}
 cd work/twoatom
-python pmsco/projects/twoatom/twoatom.py -s ea -o twoatom-demo -m single
+python ../../pmsco ../../projects/twoatom/twoatom.py -s ea -o twoatom-demo -m single
@endcode

-The project file <code>twoatom.py</code> takes the lead of the project execution.
-Usually, it contains only project-specific code and delegates common tasks to the main pmsco code.
+This command line executes the main pmsco module <code>pmsco.py</code>.
+The main module loads the project file <code>twoatom.py</code> as a plug-in
+and starts processing the common arguments.
+The <code>twoatom.py</code> module contains only project-specific code
+with several defined entry-points called from the main module.

 In the command line above, the <code>-o twoatom-demo</code> and <code>-m single</code> arguments
 are interpreted by the pmsco module.
@ -61,7 +64,7 @@ For optimum performance, the number of processes should not exceed the number of
 To start a two-hour optimization job with multiple processes on an quad-core workstation with hyperthreading:
@code{.sh}
 cd work/my_project
-mpiexec -np 8 project-dir/project.py -o my_job_0001 -t 2 -m swarm
+mpiexec -np 8 pmsco-dir/pmsco project-dir/project.py -o my_job_0001 -t 2 -m swarm
@endcode


@ -84,4 +87,4 @@ bin/qpmsco.ra.sh my_job_0001 1 8 2 projects/my_project/project.py swarm
 Be sure to consider the resource allocation policy of the cluster
 before you decide on the number of processes.
 Requesting less resources will prolong the run time but might increase the scheduling priority.
-*/
+*/
--- a/docs/src/installation.dox
+++ b/docs/src/installation.dox
@ -3,60 +3,71 @@

 \subsection sec_general General Remarks

-The PMSCO code is maintained under git.
+The PMSCO code is maintained under [Git](https://git-scm.com/).
 The central repository for PSI-internal projects is at https://git.psi.ch/pearl/pmsco,
 the public repository at https://gitlab.psi.ch/pearl/pmsco.
-
 For their own developments, users should clone the repository.
 Changes to common code should be submitted via pull requests.

-
-\subsection sec_requirements Requirements
+The program code of PMSCO and its external programs is written in Python, C++ and Fortran.
+The code will run in any recent Linux environment on a workstation or in a virtual machine.
+Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
+and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
+For optimization jobs, a workstation with at least 4 processor cores
+or cluster with 20-50 available processor cores is recommended.
+The program requires about 2 GB of RAM per process.

 The recommended IDE is [PyCharm (community edition)](https://www.jetbrains.com/pycharm).
 The documentation in [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) format is part of the source code.
 The Doxygen compiler can generate separate documentation in HTML or LaTeX.

-The MSC and EDAC codes compile with the GNU Fortran and C++ compilers on Linux.
-Other compilers may work but have not been tested.
-The code will run in any recent Linux environment on a workstation or in a virtual machine.
-Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
-and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
-For optimization jobs, a high-performance cluster with 20-50 available processor cores is recommended.
-The code requires about 2 GB of RAM per process.

-Please note that it may be important that the code remains compatible with earlier compiler and library versions.
-Newer compilers or the latest versions of the libraries contain features that will break the compatibility.
-The code can be used with newer versions as long they are backward compatible.
+\subsection sec_requirements Requirements
+
+Please note that in some environments (particularly shared high-performance machines)
+it may be important to choose specific compiler and library versions.
+In order to maintain backward compatibility with some of these older machines,
+code that requires new versions of compilers and libraries should be introduced carefully.
+
 The code depends on the following libraries:

- GCC 4.8
- OpenMPI 1.10
+- GCC >= 4.8
+- OpenMPI >= 1.10
 - F2PY
 - F2C
 - SWIG
- Python 2.7 (incompatible with Python 3.0)
- Numpy 1.11 (incompatible with Numpy 1.13 and later)
- MPI4PY (from PyPI)
 - BLAS
 - LAPACK
- periodictable
+- Python 2.7 or 3.6
+- Numpy >= 1.11
+- Python packages from PyPI listed in the requirements.txt file
+
+Most of these requirements are available from the Linux distribution.
+For an easily maintainable Python environment, [Miniconda](https://conda.io/miniconda.html) is recommended.
+The Python environment distributed with the OS often contains outdated packages,
+and it's difficult to switch between different Python versions.

-Most of these requirements are available from the Linux distribution, or from PyPI (pip install), respectively.
-If there are any issues with the packages installed by the distribution, try the ones from PyPI
-(e.g. there is currently a bug in the Debian mpi4py package).
-The F2C source code is contained in the repository for machines which don't have it installed.
 On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
 Details are explained in the PEARL Wiki.

-\subsubsection sec_install_ubuntu Installation on Ubuntu 16.04
+PMSCO runs under Python 2.7 or Python 3.6 or higher.
+Since Python 2 is being deprecated, the code has been ported to Python 3.6.
+Compatibility with Python 2.7 is maintained by using
+the [future package](http://python-future.org/compatible_idioms.html).
+New code should be written according to their guidelines.

-The following instructions install the necessary dependencies on Ubuntu (or Lubuntu 16.04):
+
+\subsection sec_install_instructions Instructions
+
+\subsubsection sec_install_ubuntu Installation on Ubuntu
+
+The following instructions install the necessary dependencies on Ubuntu, Debian or related distributions.
+The Python environment is provided by [Miniconda](https://conda.io/miniconda.html).

@code{.sh}
-sudo apt-get update
+sudo apt update

-sudo apt-get install \
+sudo apt install \
 binutils \
 build-essential \
 doxygen \
@ -67,38 +78,114 @@ gcc \
 gfortran \
 git \
 graphviz \
-ipython \
+libblas-dev \
+liblapack-dev \
 libopenmpi-dev \
 make \
+nano \
 openmpi-bin \
 openmpi-common \
-python-all \
-python-mock \
-python-nose \
-python-numpy \
-python-pip \
-python-scipy \
-python2.7-dev \
-swig
+sqlite3 \
+swig \
+wget
+@endcode

-sudo pip install --system mpi4py periodictable
+On systems where the link to libblas is missing (see @ref sec_compile below),
+the following lines are necessary.

+@code{.sh}
 cd /usr/lib
 sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
@endcode

-The following instructions install the PyCharm IDE and a few other useful utilities:
+Install Miniconda according to their [instructions](https://conda.io/docs/user-guide/install/index.html),
+then configure the Python environment:

@code{.sh}
-sudo sh -c 'echo "deb http://archive.getdeb.net/ubuntu xenial-getdeb apps" >> /etc/apt/sources.list.d/getdeb.list'
-wget -q -O - http://archive.getdeb.net/getdeb-archive.key | sudo apt-key add -
-sudo apt-get update
-sudo apt-get install \
+conda create -q --yes -n pmsco python=2.7
+source activate pmsco
+conda install -q --yes -n pmsco \
+    pip \
+    numpy \
+    scipy \
+    ipython \
+    mpi4py \
+    matplotlib \
+    nose \
+    mock \
+    future \
+    statsmodels
+pip install periodictable attrdict fasteners
+@endcode
+
+\subsubsection sec_install_singularity Installation in Singularity container
+
+A [Singularity](https://www.sylabs.io/guides/2.5/user-guide/index.html) container
+contains all OS and Python dependencies for running PMSCO.
+Besides the Singularity executable, nothing else needs to be installed in the host system.
+This may be the fastest way to get PMSCO running.
+
+For installation of Singularity,
+see their [user guide](https://www.sylabs.io/guides/2.5/user-guide/installation.html).
+On newer Linux systems (e.g. Ubuntu 18.04), Singularity is available from the package manager.
+Installation in a virtual machine on Windows or Mac are straightforward
+thanks to the [Vagrant system](https://www.vagrantup.com/).
+
+After installing Singularity,
+check out PMSCO as explained in the @ref sec_compile section:
+
+@code{.sh}
+cd ~
+mkdir containers
+git clone git@git.psi.ch:pearl/pmsco.git pmsco
+cd pmsco
+git checkout master
+git checkout -b my_branch
+@endcode
+
+Then, either copy a pre-built container into `~/containers`,
+or build one from a script provided by the PMSCO repository:
+
+@code{.sh}
+cd ~/containers
+sudo singularity build pmsco.simg ~/containers/pmsco/extras/singularity/singularity_python2
+@endcode
+
+To work with PMSCO, start an interactive shell in the container and switch to the pmsco environment.
+Note that the PMSCO code is outside the container and can be edited with the usual tools.
+
+@code{.sh}
+cd ~/containers
+singularity shell pmsco.simg
+source activate pmsco
+cd ~/containers/pmsco
+make all
+nosetests -w tests/
+@endcode
+
+Or call PMSCO from outside:
+
+@code{.sh}
+cd ~/containers
+mkdir output
+cd output
+singularity run ../pmsco.simg python ~/containers/pmsco/pmsco path/to/your-project.py arg1 arg2 ...
+@endcode
+
+For parallel processing, prepend `mpirun -np X` to the singularity command as needed.
+
+
+\subsubsection sec_install_extra Additional Applications
+
+For working with the code and data, some other applications are recommended.
+The PyCharm IDE can be installed from the Ubuntu software center.
+The following commands install other useful helper applications:
+
+@code{.sh}
+sudo apt install \
 avogadro \
 gitg \
-meld \
-openjdk-9-jdk \
-pycharm
+meld
@endcode

 To produce documentation in PDF format (not recommended on virtual machine), install LaTeX:
@ -124,15 +211,18 @@ Private key authentication is usually recommended except on shared computers.
 Clone the code repository using one of these repositiory addresses and switch to the desired branch:

@code{.sh}
-cd ~
 git clone git@git.psi.ch:pearl/pmsco.git pmsco
 cd pmsco
 git checkout master
 git checkout -b my_branch
@endcode

-The compilation of the various modules is started by <code>make all</code>.
-The compilation step is necessary only once after installation.
+Compile the code and run the unit tests to check that it worked.
+
+@code{.sh}
+make all
+nosetests -w tests/
+@endcode

 If the compilation of _loess.so failes due to a missing BLAS library,
 try to set a link to the BLAS library as follows (the actual file names may vary due to the actual distribution or version):
@ -150,7 +240,7 @@ Re-check from time to time.

@code{.sh}
 cd ~/pmsco
-nosetests
+nosetests -w tests/
@endcode

 Run the twoatom project to check the compilation of the calculation programs.
@ -161,8 +251,10 @@ mkdir work
 cd work
 mkdir twoatom
 cd twoatom/
-nice python ~/pmsco/projects/twoatom/twoatom.py  -s ~/pmsco/projects/twoatom/twoatom_energy_alpha.etpai -o twoatom_energy_alpha -m single
+nice python ~/pmsco/pmsco ~/pmsco/projects/twoatom/twoatom.py -s ea -o twoatom_energy_alpha -m single
@endcode

+Runtime warnings may appear because the twoatom project does not contain experimental data.
+
 To learn more about running PMSCO, see @ref pag_run.
 */
--- a/docs/src/introduction.dox
+++ b/docs/src/introduction.dox
@ -17,11 +17,6 @@ PMSCO is written in Python 2.7.
 EDAC is written in C++, MSC in Fortran.
 PMSCO interacts with the calculation programs through Python wrappers for C++ or Fortran.

-The MSC and EDAC source code is contained in the same software repository.
-The PMSCO, MSC, and EDAC programs may not be used outside the PEARL group without an explicit agreement by the respective original authors.
-Users of the PMSCO code are requested to coordinate and share the development of the code with the original author.
-Please read and respect the respective license agreements.
-

 \section sec_intro_highlights Highlights

@ -54,8 +49,25 @@ and the example projects.
 \section sec_intro_start Getting Started

 - @ref pag_concepts
+  - @ref pag_concepts_tasks
+  - @ref pag_concepts_emitter
 - @ref pag_install
 - @ref pag_run
 - @ref pag_command

+\section sec_license License Information
+
+An open distribution of PMSCO is available under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) at <https://gitlab.psi.ch/pearl-public/pmsco>.
+
+- Please read and respect the respective license agreements.
+- Please acknowledge the use of the code.
+- Please share your development of the code with the original author.
+
+Due to different copyright, the MSC and EDAC programs are not contained in the public software repository.
+These programs may not be used without an explicit agreement by the respective original authors.
+
+\author    Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
+\version   This documentation is compiled from version $(REVISION).
+\copyright 2015-2018 by [Paul Scherrer Institut](http://www.psi.ch)
+\copyright Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
 */
--- a/docs/src/optimizers.dox
+++ b/docs/src/optimizers.dox
@ -0,0 +1,141 @@
+/*! @page pag_opt Model optimizers
+\section sec_opt Model optimizers
+
+
+
+\subsection sec_opt_swarm Particle swarm
+
+The particle swarm algorithm is adapted from
+D. A. Duncan et al., Surface Science 606, 278 (2012).
+
+The general parameters of the genetic algorithm are specified in the @ref Project.optimizer_params dictionary.
+Some of them can be changed on the command line.
+
+| Parameter | Command line | Range | Description |
+| --- | --- | --- | --- |
+| pop_size | --pop-size | &ge; 1 | |
+| position_constrain_mode | | default bounce | Resolution of domain limit violations. |
+| seed_file | --seed-file | a file path, default none | |
+| seed_limit | --seed-limit | 0..pop_size | |
+| rfac_limit | | 0..1, default 0.8 | Accept only seed values that have a lower R-factor. |
+| recalc_seed | | True or False, default True | |
+
+The domain parameters have the following meanings:
+
+| Parameter | Description |
+| --- | --- |
+| start | Seed model. The start values are copied into particle 0 of the initial population. |
+| min | Lower limit of the parameter range. |
+| max | Upper limit of the parameter range. |
+| step | Not used. |
+
+
+\subsection sec_opt_genetic Genetic optimization
+
+The genetic algorithm evolves a population of individuals 
+by a combination of inheritance, crossover, mutation
+and selection in analogy to biological evolution.
+The _genes_ are in this case the model parameters,
+and selection occurs based on R-factor.
+The genetic algorithm is adapted from
+D. A. Duncan et al., Surface Science 606, 278 (2012).
+It is implemented in the @ref pmsco.optimizers.genetic module.
+
+The genetic optimization is helpful in the first stage of an optimization
+where a large parameter space needs to be sampled
+and fast convergence on a small part of the parameter space is less desirable
+as it might catch on a local optimum.
+On the other hand, convergence near the optimum is slower than in the particle swarm.
+The genetic optimization should be run with a large number of iterations
+rather than a large population size.
+
+The general parameters of the genetic algorithm are specified in the @ref Project.optimizer_params dictionary.
+Some of them can be changed on the command line.
+
+| Parameter | Command line | Range | Description |
+| --- | --- | --- | --- |
+| pop_size | --pop-size | &ge; 1 | |
+| mating_factor | | 1..pop_size, default 4 | |
+| strong_mutation_probability | | 0..1, default 0.01 | Probability that a parameter undergoes a strong mutation. |
+| weak_mutation_probability | | 0..1, default 1 | Probability that a parameter undergoes a weak mutation. This parameters should be left at 1. Lower values tend to produce discrete parameter values. Weak mutations can be tuned by the step domain parameters. |
+| position_constrain_mode | | default random | Resolution of domain limit violations. |
+| seed_file | --seed-file | a file path, default none | |
+| seed_limit | --seed-limit | 0..pop_size | |
+| rfac_limit | | 0..1, default 0.8 | Accept only seed values that have a lower R-factor. |
+| recalc_seed | | True or False, default True | |
+
+The domain parameters have the following meanings:
+
+| Parameter | Description |
+| --- | --- |
+| start | Seed model. The start values are copied into particle 0 of the initial population. |
+| min | Lower limit of the parameter range. |
+| max | Upper limit of the parameter range. |
+| step | Standard deviation of the Gaussian distribution of weak mutations. The step should not be much lower than the the parameter range divided by the population size and not greater than one third of the parameter range. |
+
+
+\subsection sec_opt_grid Grid search
+
+The grid search algorithm samples the parameter space at equidistant steps.
+The order of calculations is randomized so that early results represent various parts of the parameter space.
+
+| Parameter | Description |
+| --- | --- |
+| start | Values of fixed parameters. |
+| min | Lower limit of the parameter range. |
+| max | Upper limit of the parameter range. If abs(max - min) < step/2 , the parameter is kept constant. |
+| step | Step size (distance between two grid points). If step <= 0, the parameter is kept constant. |
+
+
+\subsection sec_opt_gradient Gradient search
+
+Currently not implemented.
+
+\subsection sec_opt_table Table scan
+
+The table scan calculates models from an explicit table of model parameters.
+It can be used to recalculate models from a previous optimization run on other experimental data,
+as an interface to external optimizers,
+or as a simple input of manually edited model parameters.
+
+The table can be stored in an external file that is specified on the command line,
+or supplied in one of several forms by the custom project class.
+The table can be left unchanged during the calculations,
+or new models can be added on the go.
+
+@attention Because it is not easily possible to know when and which models have been read from the table file, if you do modify the table file during processing, pay attention to the following hints:
+1. The file on disk must not be locked for more than a second. Do not keep the file open unnecessarily.
+2. _Append_ new models to the end of the table rather than overwriting previous ones. Otherwise, some models may be lost before they have been calculated.
+
+The general parameters of the table scan are specified in the @ref Project.optimizer_params dictionary.
+Some of them can be changed on the command line or in the project class (depending on how the project class is implemented).
+
+| Parameter | Command line | Range | Description |
+| --- | --- | --- | --- |
+| pop_size | --pop-size | &ge; 1 | Number of models in a generation (calculated in parallel). In table mode, this parameter is not so important and can be left at the default. It has nothing to do with table size. |
+| table_file | --table-file | a file path, default none | |
+
+The domain parameters have the following meanings.
+Models that violate the parameter range are not calculated.
+
+| Parameter | Description |
+| --- | --- |
+| start | Not used. |
+| min | Lower limit of the parameter range. |
+| max | Upper limit of the parameter range. |
+| step | Not used. |
+
+
+\subsection sec_opt_single Single model
+
+The single model optimizer calculates the model defined by domain.start.
+
+| Parameter | Description |
+| --- | --- |
+| start | Values of model parameters. |
+| min | Not used. |
+| max | Not used. |
+| step | Not used. |
+
+*/
+
--- a/docs/src/uml/CalculationTask-class.puml
+++ b/docs/src/uml/CalculationTask-class.puml
@ -0,0 +1,38 @@
+@startuml
+
+class CalculationTask {
+id : CalcID
+parent : CalcID
+model : dict
+file_root : str
+file_ext : str
+result_filename : str
+modf_filename : str
+result_valid : bool
+time : datetime.timedelta
+files : dict
+region : dict
+__init__()
+__eq__()
+__hash__()
+copy()
+change_id()
+format_filename()
+get_mpi_message()
+set_mpi_message()
+add_task_file()
+rename_task_file()
+remove_task_file()
+}
+
+class CalcID {
+model
+scan
+sym
+emit
+region
+}
+
+CalculationTask *-- CalcID
+
+@enduml
--- a/docs/src/uml/CalculationTask-objects.puml
+++ b/docs/src/uml/CalculationTask-objects.puml
@ -0,0 +1,133 @@
+@startuml
+
+object Root {
+id = -1, -1, -1, -1, -1
+parent = -1, -1, -1, -1, -1
+model = {}
+}
+
+Root o.. Model1
+Root o.. Model2
+
+object Model1 {
+id = 1, -1, -1, -1, -1
+parent = -1, -1, -1, -1, -1
+model = {'d': 5}
+}
+
+object Model2 {
+id = 2, -1, -1, -1, -1
+parent = -1, -1, -1, -1, -1
+model = {'d': 7}
+}
+
+Model1 o.. Scan11
+Model1 o.. Scan12
+Model2 o.. Scan21
+
+object Scan11 {
+id = 1, 1, -1, -1, -1
+parent = 1, -1, -1, -1, -1
+model = {'d': 5}
+}
+
+object Scan12 {
+id = 1, 2, -1, -1, -1
+parent = 1, -1, -1, -1, -1
+model = {'d': 5}
+}
+
+object Scan21 {
+id = 2, 1, -1, -1, -1
+parent = 2, -1, -1, -1, -1
+model = {'d': 7}
+}
+
+Scan11 o.. Sym111
+
+object Sym111 {
+id = 1, 1, 1, -1, -1
+parent = 1, 1, -1, -1, -1
+model = {'d': 5}
+}
+
+Sym111 o.. Emitter1111
+
+object Emitter1111 {
+id = 1, 1, 1, 1, -1
+parent = 1, 1, 1, -1, -1
+model = {'d': 5}
+}
+
+Emitter1111 o.. Region11111
+
+object Region11111 {
+id = 1, 1, 1, 1, 1
+parent = 1, 1, 1, 1, -1
+model = {'d': 5}
+}
+
+
+@enduml
+
+@startuml
+
+object "Root: CalculationTask" as Root {
+}
+note right: all attributes undefined
+
+object "Model: CalculationTask" as Model {
+model
+}
+note right: model is defined\nother attributes undefined
+
+object ModelHandler
+
+object "Scan: CalculationTask" as Scan {
+model
+scan
+}
+
+object ScanHandler
+
+object "Sym: CalculationTask" as Sym {
+model
+scan
+symmetry
+}
+
+object "SymmetryHandler" as SymHandler
+
+object "Emitter: CalculationTask" as Emitter {
+model
+scan
+symmetry
+emitter
+}
+
+object EmitterHandler
+
+object "Region: CalculationTask" as Region {
+model
+scan
+symmetry
+emitter
+region
+}
+note right: all attributes well-defined
+
+object RegionHandler
+
+Root "1" o.. "1..*" Model
+Model "1" o.. "1..*" Scan
+Scan "1" o.. "1..*" Sym
+Sym "1" o.. "1..*" Emitter
+Emitter "1" o.. "1..*" Region
+
+(Root, Model) .. ModelHandler
+(Model, Scan) .. ScanHandler
+(Scan, Sym) .. SymHandler
+(Sym, Emitter) .. EmitterHandler
+(Emitter, Region) .. RegionHandler
+
+@enduml
--- a/docs/src/uml/calculation-task.puml
+++ b/docs/src/uml/calculation-task.puml
@ -0,0 +1,90 @@
+@startuml
+
+
+class CalculationTask {
+model
+scan
+symmetry
+emitter
+region
+..
+files
+}
+
+class Model {
+    index
+    ..
+    dlat
+    dAS
+    dS1S2
+    V0
+    Zsurf
+    Texp
+    rmax
+}
+
+class Scan {
+    index
+    ..
+    filename
+    mode
+    initial_state
+    energies
+    thetas
+    phis
+    alphas
+}
+
+class Symmetry {
+    index
+    ..
+    rotation
+    registry
+}
+
+class Emitter {
+    index
+
+}
+
+class Region {
+    index
+    ..
+    range
+}
+
+CalculationTask *-- Model
+CalculationTask *-- Scan
+CalculationTask *-- Symmetry
+CalculationTask *-- Emitter
+CalculationTask *-- Region
+
+class Project {
+    scans
+    symmetries
+    model_handler
+    cluster_generator
+}
+
+    class ClusterGenerator {
+        count_emitters()
+        create_cluster()
+    }
+
+class ModelHandler {
+    create_tasks()
+    add_result()
+}
+
+Model ..> ModelHandler
+Scan ..> Project
+Symmetry ..> Project
+Emitter ..> ClusterGenerator
+Region ..> Project
+
+Project *-left- ModelHandler
+Project *- ClusterGenerator
+
+hide empty members
+
+@enduml
--- a/docs/src/uml/cluster-generator.puml
+++ b/docs/src/uml/cluster-generator.puml
@ -0,0 +1,47 @@
+@startuml
+
+package pmsco {
+    class Project {
+        cluster_generator
+        export_cluster()
+    }
+
+    abstract class ClusterGenerator {
+        project
+        {abstract} count_emitters()
+        {abstract} create_cluster()
+    }
+
+    class LegacyClusterGenerator {
+        project
+        count_emitters()
+        create_cluster()
+    }
+}
+
+package "user project" {
+    class UserClusterGenerator {
+        project
+        count_emitters()
+        create_cluster()
+    }
+
+    note bottom : for complex cluster
+
+    class UserProject {
+        count_emitters()
+        create_cluster()
+    }
+
+    note bottom : for simple cluster
+
+}
+
+Project <|-- UserProject
+ClusterGenerator <|-- LegacyClusterGenerator
+ClusterGenerator <|-- UserClusterGenerator
+Project *-- ClusterGenerator
+UserProject .> LegacyClusterGenerator
+UserProject .> UserClusterGenerator
+
+@enduml
--- a/docs/src/uml/database.puml
+++ b/docs/src/uml/database.puml
@ -0,0 +1,81 @@
+@startuml
+
+
+class Project << (T,orchid) >> {
+id
+..
+..
+name
+code
+}
+
+class Scan << (T,orchid) >> {
+id
+..
+job_id
+..
+name
+}
+
+class Job << (T,orchid) >> {
+id
+..
+project_id
+..
+name
+mode
+machine
+git_hash
+datetime
+description
+}
+
+class Model << (T,orchid) >> {
+id
+..
+job_id
+..
+model
+gen
+particle
+}
+
+class Result << (T,orchid) >> {
+id
+..
+model_id
+..
+scan
+sym
+emit
+region
+rfac
+}
+
+class Param << (T,orchid) >> {
+id
+..
+..
+key
+}
+
+class ParamValue << (T,orchid) >> {
+id
+..
+param_id
+model_id
+..
+value
+}
+
+Project "1" *-- "*" Job
+Job "1" *-- "*" Model
+Job "1" *-- "*" Scan
+Param "1" *-- "*" ParamValue
+Model "1" *-- "*" ParamValue
+Model "1" *-- "*" Result
+
+hide empty members
+
+
+@enduml
--- a/docs/src/uml/handler-activity.puml
+++ b/docs/src/uml/handler-activity.puml
@ -0,0 +1,45 @@
+@startuml
+
+start
+
+repeat
+:define model tasks;
+
+:gather model results;
+repeat while
+
+stop
+
+@enduml
+
+@startuml
+
+start
+
+repeat
+partition "generate tasks" {
+:define model tasks;
+:define scan tasks;
+:define symmetry tasks;
+:define emitter tasks;
+:define region tasks;
+}
+fork
+:calculate task 1;
+fork again
+:calculate task 2;
+fork again
+:calculate task N;
+end fork
+partition "collect results" {
+:gather region results;
+:gather emitter results;
+:gather symmetry results;
+:gather scan results;
+:gather model results;
+}
+repeat while
+
+stop
+
+@enduml
--- a/docs/src/uml/master-slave-messages.puml
+++ b/docs/src/uml/master-slave-messages.puml
@ -0,0 +1,24 @@
+@startuml{master-slave-messages.png}
+== task execution ==
+loop calculation tasks
+    hnote over Master : define task
+
+    Master -> Slave: TAG_NEW_TASK
+    activate Slave
+    hnote over Slave : calculation
+    alt successful
+        Slave --> Master: TAG_NEW_RESULT
+    else calculation failed
+        Slave --> Master: TAG_INVALID_RESULT
+    else critical error
+        Slave --> Master: TAG_ERROR_ABORTING
+    end
+    deactivate Slave
+
+    hnote over Master : collect results
+end
+...
+== termination ==
+Master -> Slave: TAG_FINISH
+destroy Slave
+@enduml
--- a/docs/src/uml/minimum-project-classes.puml
+++ b/docs/src/uml/minimum-project-classes.puml
@ -0,0 +1,31 @@
+@startuml
+
+package pmsco {
+    abstract class Project {
+        mode
+        code
+        scans
+        symmetries
+        {abstract} create_cluster()
+        {abstract} create_params()
+        {abstract} create_domain()
+    }
+
+}
+
+package projects {
+    class UserProject {
+        __init__()
+        create_cluster()
+        create_params()
+        create_domain()
+    }
+
+}
+
+Project <|-- UserProject
+
+hide empty members
+
+
+@enduml
--- a/docs/src/uml/mpi-processes.puml
+++ b/docs/src/uml/mpi-processes.puml
@ -0,0 +1,66 @@
+@startuml
+participant rank0 as "rank 0 (master)"
+participant rank1 as "rank 1 (slave)"
+participant rank2 as "rank 2 (slave)"
+participant rankN as "rank N (slave)"
+
+== initialization ==
+rank0 ->> rank0
+activate rank0
+
+rnote over rank0: initialize project
+
+== task loop ==
+
+rnote over rank0: specify tasks
+rank0 ->> rank1: task 1
+activate rank1
+rnote over rank1: execute task 1
+rank0 ->> rank2: task 2
+activate rank2
+rnote over rank2: execute task 2
+rank0 ->> rankN: task N
+deactivate rank0
+activate rankN
+rnote over rankN: execute task N
+rank0 <<-- rank1: result 1
+deactivate rank1
+rnote over rank0: process results\nspecify tasks
+activate rank0
+rank0 ->> rank1: task N+1
+deactivate rank0
+activate rank1
+rnote over rank1: execute task N+1
+rank0 <<-- rank2: result 2
+deactivate rank2
+activate rank0
+rank0 ->> rank2: task N+2
+deactivate rank0
+activate rank2
+rnote over rank2: execute task N+2
+rank0 <<-- rankN: result N
+deactivate rankN
+activate rank0
+rank0 ->> rankN: task 2N
+deactivate rank0
+activate rankN
+rnote over rankN: execute task 2N
+rank0 <<-- rank1: result N+1
+deactivate rank1
+rank0 <<-- rank2: result N+2
+deactivate rank2
+rank0 <<-- rankN: result 2N
+deactivate rankN
+rnote over rank0: process results
+activate rank0
+hnote over rank0: calculations complete
+== termination ==
+rnote over rank0: report results
+rank0 ->> rank1: finish
+destroy rank1
+rank0 ->> rank2: finish
+destroy rank2
+rank0 ->> rankN: finish
+destroy rankN
+deactivate rank0
+@enduml
--- a/docs/src/uml/project-classes.puml
+++ b/docs/src/uml/project-classes.puml
@ -0,0 +1,59 @@
+@startuml
+
+abstract class Project {
+        mode : str = "single"
+        code : str = "edac"
+        scans : Scan [1..*]
+        symmetries : dict [1..*]
+        cluster_generator : ClusterGenerator
+        handler_classes
+        files : FileTracker
+        {abstract} create_cluster() : Cluster
+        {abstract} create_params() : Params
+        {abstract} create_domain() : Domain
+    }
+
+class Scan {
+    filename
+    raw_data
+    dtype
+    modulation
+    mode
+    emitter
+    initial_state
+    energies
+    thetas
+    phis
+    alphas
+    set_scan()
+}
+
+class Domain {
+    start : dict
+    min : dict
+    max : dict
+    step : dict
+    add_param(name, start, min, max, step)
+    get_param(name)
+}
+
+class Params {
+    title
+    comment
+    cluster_file
+    output_file
+    scan_file
+    initial_state
+    polarization
+    angular_resolution
+    z_surface
+    inner_potential
+    work_function
+    polar_incidence_angle
+    azimuthal_incidence_angle
+    experiment_temperature
+}
+
+Project "1" *-- "1..*" Scan
+
+@enduml
--- a/docs/src/uml/scan-tasks-activity.puml
+++ b/docs/src/uml/scan-tasks-activity.puml
@ -0,0 +1,26 @@
+@startuml
+:model task|
+fork
+    partition "scan 0" {
+        :define scan;
+        :scan 0 task|
+        detach
+        :scan 0 result|
+    }
+fork again
+    partition "scan 1" {
+        :define scan;
+        :scan 1 task|
+        detach
+        :scan 1 result|
+    }
+fork again
+    partition "scan N" {
+        :define scan;
+        :scan N task|
+        detach
+        :scan N result|
+    }
+end fork
+:model result|
+@enduml
--- a/docs/src/uml/top-activity-partitions.puml
+++ b/docs/src/uml/top-activity-partitions.puml
@ -0,0 +1,42 @@
+@startuml
+
+|user|
+start
+:setup;
+|pmsco|
+:initialize;
+:import experimental data;
+repeat
+:define task;
+|calculator|
+:calculate\ntask;
+|pmsco|
+:evaluate results;
+repeat while
+-> [finished];
+:report results;
+
+
+stop
+
+@enduml
+
+@startuml
+
+|pmsco|
+start
+:define task (model, scan, symmetry, emitter, region);
+|project|
+:create cluster;
+:create parameters;
+|calculator|
+:scattering calculation;
+|pmsco|
+:combine results;
+|project|
+:calculate modulation function;
+:calculate R-factor;
+stop
+
+@enduml
+
--- a/docs/src/uml/top-activity.puml
+++ b/docs/src/uml/top-activity.puml
@ -0,0 +1,21 @@
+@startuml
+
+start
+:initialize;
+:import experimental data|
+repeat
+:define tasks;
+fork
+:calculate\ntask 1;
+fork again
+:calculate\ntask N;
+end fork
+:evaluate results;
+repeat while
+-> [finished];
+:report results|
+
+
+stop
+
+@enduml
--- a/docs/src/uml/top-components.puml
+++ b/docs/src/uml/top-components.puml
@ -0,0 +1,23 @@
+@startuml
+
+skinparam componentStyle uml2
+
+component "project" as project
+component "PMSCO" as pmsco
+component "scattering code\n(calculator)" as calculator
+
+interface "command line" as cli
+interface "input files" as input
+interface "output files" as output
+interface "experimental data" as data
+interface "results" as results
+
+data -> project
+project ..> pmsco
+pmsco ..> calculator
+cli --> project
+input -> calculator
+calculator -> output
+pmsco -> results
+
+@enduml
--- a/docs/src/uml/user-project-classes.puml
+++ b/docs/src/uml/user-project-classes.puml
@ -0,0 +1,55 @@
+@startuml
+
+package pmsco {
+    abstract class Project {
+        mode
+        code
+        scans
+        symmetries
+        cluster_generator
+        handler_classes
+        __
+        {abstract} create_cluster()
+        {abstract} create_params()
+        {abstract} create_domain()
+        ..
+        combine_scans()
+        combine_symmetries()
+        combine_emitters()
+        calc_modulation()
+        calc_rfactor()
+    }
+
+    abstract class ClusterGenerator {
+        {abstract} count_emitters()
+        {abstract} create_cluster()
+    }
+
+}
+
+package projects {
+    class UserProject {
+        scan_dict
+        __
+        setup()
+        ..
+        create_params()
+        create_domain()
+        ..
+        combine_symmetries()
+    }
+
+    class UserClusterGenerator {
+        count_emitters()
+        create_cluster()
+    }
+
+}
+
+Project <|-- UserProject
+Project *-- ClusterGenerator
+ClusterGenerator <|-- UserClusterGenerator
+
+hide empty members
+
+@enduml