add files for public distribution

based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
2017-11-22 14:55:20 +01:00
parent 96d206fc7b
commit bbd16d0f94
102 changed files with 230209 additions and 0 deletions
--- a/docs/.gitignore
+++ b/docs/.gitignore
@ -0,0 +1,3 @@
+doxygen*.db
+html/*
+latex/*
--- a/docs/config.dox
+++ b/docs/config.dox
--- a/docs/makefile
+++ b/docs/makefile
@ -0,0 +1,26 @@
+SHELL=/bin/sh
+
+# makefile for PMSCO documentation
+#
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .html
+.PHONY: all docs clean
+
+DOX=doxygen
+DOXOPTS=
+LATEX_DIR=latex
+
+all: docs
+
+docs: doxygen pdf
+
+doxygen:
+	$(DOX) $(DOXOPTS) config.dox
+
+pdf: doxygen
+	-$(MAKE) -C $(LATEX_DIR)
+
+clean:
+	-rm -rf latex/*
+	-rm -rf html/*
--- a/docs/readme.txt
+++ b/docs/readme.txt
@ -0,0 +1,7 @@
+to compile the source code documentation, you need the following packages (naming according to Debian):
+
+doxygen
+doxygen-gui (optional)
+doxypy
+graphviz
+latex (optional)
--- a/docs/src/commandline.dox
+++ b/docs/src/commandline.dox
@ -0,0 +1,144 @@
+/*! @page pag_command Command Line
+\section sec_command Command Line
+
+This section describes the command line arguments for a direct call of PMSCO from the shell.
+For batch job submission to Slurm see @ref sec_slurm.
+
+Since PMSCO is started indirectly by a call of the specific project module,
+the syntax of the command line arguments is defined by the project module.
+However, to reduce the amount of custom code and documentation and to avoid confusion
+it is recommended to adhere to the standard syntax described below.
+
+The basic command line is as follows:
+@code{.sh}
+[mpiexec -np NPROCESSES] python path-to-project.py [common args] [project args]
+@endcode
+
+Include the first portion between square brackets if you want to run parallel processes.
+Specify the number of processes as the @c -np option.
+@c path-to-project.py should be the path and name to your project module.
+Common args and project args are described below.
+
+
+\subsection sec_common_args Common Arguments
+
+All common arguments are optional and default to more or less reasonable values if omitted.
+They can be added to the command line in arbitrary order.
+The following table is ordered by importance.
+
+
+| Option | Values | Description |
+| --- | --- | --- |
+| -h , --help | | Display a command line summary and exit. |
+| -m , --mode | single (default), grid, swarm | Operation mode. |
+| -d, --data-dir | file system path | Directory path for experimental data files (if required by project). Default: current working directory. |
+| -o, --output-file | file system path | Base path and/or name for intermediate and output files. Default: pmsco_data |
+| -t, --time-limit | decimal number | Wall time limit in hours. The optimizers try to finish before the limit. Default: 24.0. |
+| -k, --keep-files | list of file categories | Output file categories to keep after the calculation. Multiple values can be specified and must be separated by spaces. By default, cluster and model (simulated data) of a limited number of best models are kept. See @ref sec_file_categories below. |
+| --log-level | DEBUG, INFO, WARNING (default), ERROR, CRITICAL | Minimum level of messages that should be added to the log. |
+| --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
+| --log-disable | | Disable logging. By default, logging is on. |
+| --pop-size | integer | Population size (number of particles) in swarm optimization mode. The default value is the greater of 4 or two times the number of calculation processes. |
+| -c, --code | edac (default) | Scattering code. At the moment, only edac is supported. |
+
+
+\subsubsection sec_file_categories File Categories
+
+The following category names can be used with the @c --keep-files option.
+Multiple names can be specified and must be separated by spaces.
+
+| Category | Description | Default Action |
+| --- | --- | --- |
+| input |      raw input files for calculator, including cluster and phase files in custom format | delete |
+| output |     raw output files from calculator | delete |
+| phase |      phase files in portable format for report |  delete |
+| cluster |    cluster files in portable XYZ format for report | keep |
+| debug |      debug files |  delete |
+| model |       output files in ETPAI format: complete simulation  (a_-1_-1_-1_-1) | keep |
+| scan |       output files in ETPAI format: scan (a_b_-1_-1_-1) |  delete |
+| symmetry |   output files in ETPAI format: symmetry (a_b_c_-1_-1) |  delete |
+| emitter |    output files in ETPAI format: emitter (a_b_c_d_-1) |  delete |
+| region |     output files in ETPAI format: region (a_b_c_d_e) |  delete |
+| report|      final report of results |  keep |
+| population |  final state of particle population | keep |
+| rfac |        files related to models which give bad r-factors | delete |
+
+
+\subsection sec_project_args Project Arguments
+
+The following table lists a few recommended options that are handled by the project code.
+Project options that are not listed here should use the long form to avoid conflicts in future versions.
+
+
+| Option | Values | Description |
+| --- | --- | --- |
+| -s, --scans | project-dependent | Nick names of scans to use in calculation. The nick name selects the experimental data file and the initial state of the photoelectron. Multiple values can be specified and must be separated by spaces. |
+
+
+\subsection sec_scanfile Experimental Scan Files
+
+The recommended way of specifying experimental scan files is using nick names (dictionary keys) and the @c --scans option.
+A dictionary in the module code defines the corresponding file name, chemical species of the emitter and initial state of the photoelectron.
+The location of the files is selected using the common @c --data-dir option.
+This way, the file names and photoelectron parameters are versioned with the code,
+whereas command line arguments may easily get forgotten in the records.
+
+
+\subsection sec_project_example Example Argument Handling
+
+An example for handling the command line in a project module can be found in the twoatom.py demo project.
+The following code snippet shows how the common and project arguments are separated and handled.
+
+@code{.py}
+def main():
+    # have the pmsco module parse the common arguments.
+    args, unknown_args = pmsco.pmsco.parse_cli()
+
+    # pass any arguments not handled by pmsco
+    # to the project-defined parse_project_args function.
+    # unknown_args can be passed to argparse.ArgumentParser.parse_args().
+    if unknown_args:
+        project_args = parse_project_args(unknown_args)
+    else:
+        project_args = None
+
+    # create the project object
+    project = create_project()
+
+    # apply the common arguments on the project
+    pmsco.pmsco.set_common_args(project, args)
+
+    # apply the specific arguments on the project
+    set_project_args(project, project_args)
+
+    # run the project
+    pmsco.pmsco.run_project(project)
+@endcode
+
+
+\section sec_slurm Slurm Job Submission
+
+The command line of the Slurm job submission script for the Ra cluster at PSI is as follows.
+This script is specific to the configuration of the Ra cluster but may be adapted to other Slurm-based queues.
+
+@code{.sh}
+qpmsco.sh [NOSUB] JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
+@endcode
+
+Here, the first few arguments are positional and their order must be strictly adhered to.
+After the positional arguments, optional arguments of the PMSCO project command line can be added in arbitrary order.
+If you execute the script without arguments, it displays a short summary.
+The job script is written to @c ~/jobs/\$JOBNAME.
+
+| Argument | Values | Description |
+| --- | --- | --- |
+| NOSUB (optional) | NOSUB or omitted | If NOSUB is present as the first argument, create the job script but do not submit it to the queue. Otherwise, submit the job script. |
+| JOBNAME | text | Name of job. Use only alphanumeric characters, no spaces. |
+| NODES | integer | Number of computing nodes. (1 node = 24 or 32 processors). Do not specify more than 2. |
+| TASKS_PER_NODE | 1...24, or 32 | Number of processes per node. 24 or 32 for full-node allocation. 1...23 for shared node allocation. |
+| WALLTIME:HOURS | integer | Requested wall time. 1...24 for day partition, 24...192 for week partition, 1...192 for shared partition. This value is also passed on to PMSCO as the @c --time-limit argument. |
+| PROJECT | file system path | Python module (file path) that declares the project and starts the calculation. |
+| MODE | single, swarm, grid | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
+| ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |
+
+*/
--- a/docs/src/concepts.dox
+++ b/docs/src/concepts.dox
@ -0,0 +1,153 @@
+/*! @page pag_concepts Design Concepts
+\section sec_tasks Tasks
+
+In an optimization project, a number of optimizable, high-level parameters generated by the optimization algorithm
+must be mapped to the input parameters and atomic coordinates before the calculation program is executed.
+Possibly, the calculation program is executed multiple times for inequivalent domains, emitters or scan geometries.
+After the calculation, the output is collected, compared to the experimental data, and the model is refined.
+In PMSCO, the optimization is broken down into a set of _tasks_ and assigned to a stack of task _handlers_ according to the following figure.
+Each invocation of the scattering program (EDAC) runs a specific task,
+i.e. a calculation for a set of specific parameters, a fully-qualified cluster of atoms, and a specific angle and/or energy scan.
+
+\dotfile tasks.dot "PMSCO task stack"
+
+At the root, the _model handler_ proposes models that need to be calculated according to the operation mode specified at the command line.
+A _model_ is the minimum set of variable parameters in the context of a custom project.
+Other parameters that will not vary under optimization are set directly by the project code.
+The model handler may generate models based on a fixed scheme, e.g. on a grid, or based on R-factors of previous results.
+
+For each model, one task is passed to the task handling chain, starting with the scan handler.
+The _scan handler_ generates sub-tasks for each experimental scan dataset.
+This way, the model can be optimized for multiple experimental scans in the same run (see Sec. \ref sec_scanning).
+
+The _symmetry handler_ generates sub-tasks based on the number of symmetries contained in the experimental data (see Sec. \ref sec_symmetry).
+For instance, for a system that includes two inequivalent structural domains, two separate calculations have to be run for each model.
+The symmetry handler is implemented on the project level and may be customized for a specific system.
+
+The _emitter handler_ generates a sub-task for each inequivalent emitter atom
+so that the tasks can be distributed to multiple processes (see Sec. \ref sec_emitters).
+In a single-process environment, all emitters are calculated in one task.
+
+The _region handler_ may split a scan region into several smaller chunks
+so that the tasks can be distributed to multiple processes.
+With EDAC, only energy scans can benefit from chunking
+since it always calculates the full angular distribution.
+This layer has to be enabled specifically in the project module.
+It is disabled by default.
+
+At the end of the stack, the tasks are fully specified and are passed to the calculation queue.
+They are dispatched to the available processes of the MPI environment in which PMSCO was started,
+which allows calculations to be run in parallel.
+Only now that the model is broken down into multiple tasks,
+the cluster and input files are generated, and the calculation program is started.
+
+At the end of a calculation, the output is passed back through the task handler stack.
+In this phase, each level gathers the datasets from the sub-tasks to the data requested by the parent task
+and passes the result to the next higher level.
+
+On the top level, the calculation is compared to the experimental data.
+Depending on the operation mode, the model parameters are refined, and new tasks issued.
+If the optimization is finished according to a set of defined criteria, PMSCO exits.
+
+As an implentation detail, each task is given a unique _identifier_ consisting of five integer numbers
+which correspond to the five levels model, scan, symmetry, emitter and region.
+The identifier appears in the file names in the communication with the scattering program.
+Normally, the data files are deleted after the calculation, and only a few top-level files are kept
+(can be overridden at the command line or in the project code).
+At the top level, only the model ID is set, the other ones are undefined (-1).
+
+
+\section sec_symmetry Symmetry and Domain Averaging
+
+A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
+that is derived from the same set of model parameters
+and that contributes incoherently to the measured diffraction pattern.
+A symmetry may be represented by a special symmetry parameter which is not subject to optimization.
+
+For instance, a real sample may have additional rotational domains that are not present in the cluster,
+increasing the symmetry from three-fold to six-fold.
+Or, an adsorbate may be present in a number of different lateral configurations on the substrate.
+In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement.
+In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations.
+
+PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks).
+The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level.
+This section explains the necessary steps.
+
+1. Your project needs to populate the pmsco.project.Project.symmetries list.
+   For each symmetry, add a dictionary of symmetry parameters,  e.g. <code>{'angle_azi': 15.0}</code>.
+   There must be at least one symmetry in a project, otherwise no calculation is executed.
+
+2. The project may apply the symmetry of a task to the cluster and parameter file if necessary.
+   The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters.
+
+3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement.
+   The default method implemented in pmsco.project.Project just adds up all calculations with equal weight.
+   If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm.
+
+
+\section sec_scanning Scanning
+
+PMSCO with EDAC currently supports the following scan axes.
+
+- kinetic energy E
+- polar angle theta T
+- azimuthal angle phi P
+- analyser angle alpha A
+
+The following combinations of these scan axes are allowed (see pmsco.data.SCANTYPES).
+
+- E
+- E-T
+- E-A
+- T-P (hemispherical or hologram scan)
+
+@attention The T and A axes cannot be combined.
+If a scan of one of them is specified, the other is assumed to be fixed at zero!
+This assumption may change in the future,
+so it is best to explicitly set the fixed angle to zero in the scan file.
+
+@remark According to the measurement geometry at PEARL,
+alpha scans are implemented in EDAC as theta scans at phi = 90 in fixed cluster mode.
+The switch to fixed cluster mode is made by PMSCO internally,
+no change of angles or other parameters is necessary in the scan or project files
+besides filling the alpha instead of the theta column.
+
+
+\section sec_emitters Emitter Configurations
+
+Since emitters contribute incoherently to the diffraction pattern,
+it should make no difference how the emitters are grouped and calculated.
+EDAC allows to specify multiple emitters in one calculation.
+However, running EDAC multiple times for a single-emitter configuration or simply summing up the results
+gives the same final diffraction pattern with no significant difference of used CPU time.
+It is, thus, easy to distribute the emitters over parallel processes in a multi-process environment.
+PMSCO can handle this transparently with a minimal effort.
+
+Within the same framework, PMSCO also supports that clusters are tailored to a specific emitter configuration.
+Suppose that the unit cell contains a large number of inequivalent emitters.
+If all emitters had to be included in a single calculation,
+the cluster would grow very large and the calculation would take a long time
+because it would include many long scattering paths
+that effectively do not contribute intensity to the final result.
+Using single-emitters, a cluster can be built locally around the emitter and kept to a reasonable size.
+
+Even when using this feature, PMSCO does not require that each configuration contains only one emitter.
+The term _emitter_ effectively means _emitter configuration_.
+A configuration can include multiple emitters which will not be broken up further.
+It is up to the project, what is included in a particular configuration.
+
+To enable emitter handling,
+
+1. override the count_emitters method of your cluster generator
+   and return the number of emitter configurations of a given model, scan and symmetry.
+
+2. handle the emitter index in your create_cluster method.
+
+3. (optionally) override the pmsco.project.Project.combine_emitters method
+   if the emitters should not be added with equal weights.
+
+For implementation details see the respective method descriptions.
+
+*/
+
--- a/docs/src/dataflow.dot
+++ b/docs/src/dataflow.dot
@ -0,0 +1,84 @@
+digraph G {
+    compound = true;
+
+    /*
+    subgraph cluster_project {
+        label = "project";
+        mode;
+        domain;
+        create_cluster;
+        create_params;
+        calc_modf;
+        calc_rfac;
+        comb_syms;
+        comb_scans;
+    }
+    */
+
+    subgraph cluster_model {
+        label = "model handler";
+        rank = same;
+        model_creator [label="create model", group=creators];
+        model_handler [label="evaluate results", group=handlers];
+
+        model_handler -> model_creator [constraint=false, label="optimize"];
+    }
+
+    subgraph cluster_symmetry {
+        label = "symmetry handler";
+        rank = same;
+        sym_creator [label="expand models", group=creators];
+        sym_handler [label="combine symmetries", group=handlers];
+    }
+
+    subgraph cluster_scan {
+        label = "scan handler";
+        rank = same;
+        scan_creator [label="expand models", group=creators];
+        scan_handler [label="combine scans", group=handlers];
+    }
+
+    subgraph cluster_interface {
+        label = "calculator interface"
+        rank = same;
+        calc_creator [label="generate input", group=creators];
+        calc_handler [label="import output", group=handlers];
+    }
+
+    calculator [label="calculator (EDAC)", shape=box];
+
+    model_creator -> sym_creator [label="model", style=bold];
+    sym_creator -> scan_creator [label="models", style=bold];
+    scan_creator -> calc_creator [label="models", style=bold];
+    calc_creator -> calculator [label="clusters,\rparameters", style=bold];
+
+    calculator -> calc_handler [label="output files", style=bold];
+    calc_handler -> scan_handler [label="raw data files", style=bold];
+    scan_handler -> sym_handler [label="combined scans", style=bold];
+    sym_handler -> model_handler [label="combined symmetries", style=bold];
+
+    mode [shape=parallelogram];
+    mode -> model_creator [lhead="cluster_model"];
+
+    domain [shape=parallelogram];
+    domain -> model_creator;
+    //domain -> model_creator [lhead="cluster_model"];
+
+    create_cluster [shape=cds, label="cluster generator"];
+    create_cluster -> calc_creator [style=dashed];
+
+    create_params [shape=cds, label="input file generator"];
+    create_params -> calc_creator [style=dashed];
+
+    calc_modf [shape=cds, label="modulation function"];
+    calc_modf -> model_handler [style=dashed];
+
+    calc_rfac [shape=cds, label="R-factor function"];
+    calc_rfac -> model_handler [style=dashed];
+
+    comb_syms [shape=cds, label="symmetry combination rule"];
+    comb_syms -> sym_handler [style=dashed];
+
+    comb_scans [shape=cds, label="scan combination rule"];
+    comb_scans -> scan_handler [style=dashed];
+}
--- a/docs/src/execution.dox
+++ b/docs/src/execution.dox
@ -0,0 +1,87 @@
+/*! @page pag_run Running PMSCO
+\section sec_run Running PMSCO
+
+To run PMSCO you need the PMSCO code and its dependencies (cf. @ref pag_install),
+a code module that contains the project-specific code,
+and one or several files containing the scan parameters and experimental data.
+Please check the <code>projects</code> folder for examples of project modules.
+For a detailed description of the command line, see @ref pag_command.
+
+
+\subsection sec_run_single Single Process
+
+Run PMSCO from the command prompt:
+
+@code{.sh}
+cd work-dir
+python project-dir/project.py [pmsco-arguments] [project-arguments]
+@endcode
+
+where <code>work-dir</code> is the destination directory for output files,
+<code>project.py</code> is the specific project module,
+and <code>project-dir</code> is the directory where the project file is located.
+PMSCO is run in one process which handles all calculations sequentially.
+
+The command line arguments are usually divided into common arguments interpreted by the main pmsco code (pmsco.py),
+and project-specific arguments interpreted by the project module.
+However, it is ultimately up to the project module how the command line is interpreted.
+
+Example command line for a single EDAC calculation of the two-atom project:
+@code{.sh}
+cd work/twoatom
+python pmsco/projects/twoatom/twoatom.py -s ea -o twoatom-demo -m single
+@endcode
+
+The project file <code>twoatom.py</code> takes the lead of the project execution.
+Usually, it contains only project-specific code and delegates common tasks to the main pmsco code.
+
+In the command line above, the <code>-o twoatom-demo</code> and <code>-m single</code> arguments
+are interpreted by the pmsco module.
+<code>-o</code> sets the base name of output files,
+and <code>-m</code> selects the operation mode to a single calculation.
+
+The scan argument is interpreted by the project module.
+It refers to a dictionary entry that declares the scan file, the emitting atomic species, and the initial state.
+In this example, the project looks for the <code>twoatom_energy_alpha.etpai</code> scan file in the project directory,
+and calculates the modulation function for a N 1s initial state.
+The kinetic energy and emission angles are contained in the scan file.
+
+
+\subsection sec_run_parallel Parallel Processes
+
+PMSCO handles parallelization automatically and transparently.
+To start PMSCO in a parallel environment in the login shell,
+just prefix the command with <code>mpiexec -np N</code>,
+where N is the number of processes.
+One process will assume the role of the master, and the remaining will assume the role of slaves.
+The slave processes will run the scattering calculations, while the master coordinates the tasks,
+and optimizes the model parameters (depending on the operation mode).
+
+For optimum performance, the number of processes should not exceed the number of available processors.
+To start a two-hour optimization job with multiple processes on an quad-core workstation with hyperthreading:
+@code{.sh}
+cd work/my_project
+mpiexec -np 8 project-dir/project.py -o my_job_0001 -t 2 -m swarm
+@endcode
+
+
+\subsection sec_run_hpc High-Performance Cluster
+
+The script @c bin/qpmsco.ra.sh takes care of submitting a PMSCO job to the slurm queue of the Ra cluster at PSI.
+The script can be adapted to other machines running the slurm resource manager.
+The script generates a job script based on @c pmsco.ra.template,
+substituting the necessary environment and parameters,
+and submits it to the queue.
+
+Execute @c bin/qpmsco.ra.sh without arguments to see a summary of the arguments.
+
+To submit a job to the PSI clusters (see also the PEARL-Wiki page MscCalcRa),
+the analog command to the previous section would be:
+@code{.sh}
+bin/qpmsco.ra.sh my_job_0001 1 8 2 projects/my_project/project.py swarm
+@endcode
+
+Be sure to consider the resource allocation policy of the cluster
+before you decide on the number of processes.
+Requesting less resources will prolong the run time but might increase the scheduling priority.
+*/
--- a/docs/src/installation.dox
+++ b/docs/src/installation.dox
@ -0,0 +1,168 @@
+/*! @page pag_install Installation
+\section sec_install Installation
+
+\subsection sec_general General Remarks
+
+The PMSCO code is maintained under git.
+The central repository for PSI-internal projects is at https://git.psi.ch/pearl/pmsco,
+the public repository at https://gitlab.psi.ch/pearl/pmsco.
+
+For their own developments, users should clone the repository.
+Changes to common code should be submitted via pull requests.
+
+
+\subsection sec_requirements Requirements
+
+The recommended IDE is [PyCharm (community edition)](https://www.jetbrains.com/pycharm).
+The documentation in [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) format is part of the source code.
+The Doxygen compiler can generate separate documentation in HTML or LaTeX.
+
+The MSC and EDAC codes compile with the GNU Fortran and C++ compilers on Linux.
+Other compilers may work but have not been tested.
+The code will run in any recent Linux environment on a workstation or in a virtual machine.
+Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
+and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
+For optimization jobs, a high-performance cluster with 20-50 available processor cores is recommended.
+The code requires about 2 GB of RAM per process.
+
+Please note that it may be important that the code remains compatible with earlier compiler and library versions.
+Newer compilers or the latest versions of the libraries contain features that will break the compatibility.
+The code can be used with newer versions as long they are backward compatible.
+The code depends on the following libraries:
+
+- GCC 4.8
+- OpenMPI 1.10
+- F2PY
+- F2C
+- SWIG
+- Python 2.7 (incompatible with Python 3.0)
+- Numpy 1.11 (incompatible with Numpy 1.13 and later)
+- MPI4PY (from PyPI)
+- BLAS
+- LAPACK
+- periodictable
+
+Most of these requirements are available from the Linux distribution, or from PyPI (pip install), respectively.
+If there are any issues with the packages installed by the distribution, try the ones from PyPI
+(e.g. there is currently a bug in the Debian mpi4py package).
+The F2C source code is contained in the repository for machines which don't have it installed.
+On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
+Details are explained in the PEARL Wiki.
+
+\subsubsection sec_install_ubuntu Installation on Ubuntu 16.04
+
+The following instructions install the necessary dependencies on Ubuntu (or Lubuntu 16.04):
+
+@code{.sh}
+sudo apt-get update
+
+sudo apt-get install \
+binutils \
+build-essential \
+doxygen \
+doxypy \
+f2c \
+g++ \
+gcc \
+gfortran \
+git \
+graphviz \
+ipython \
+libopenmpi-dev \
+make \
+openmpi-bin \
+openmpi-common \
+python-all \
+python-mock \
+python-nose \
+python-numpy \
+python-pip \
+python-scipy \
+python2.7-dev \
+swig
+
+sudo pip install --system mpi4py periodictable
+
+cd /usr/lib
+sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
+@endcode
+
+The following instructions install the PyCharm IDE and a few other useful utilities:
+
+@code{.sh}
+sudo sh -c 'echo "deb http://archive.getdeb.net/ubuntu xenial-getdeb apps" >> /etc/apt/sources.list.d/getdeb.list'
+wget -q -O - http://archive.getdeb.net/getdeb-archive.key | sudo apt-key add -
+sudo apt-get update
+sudo apt-get install \
+avogadro \
+gitg \
+meld \
+openjdk-9-jdk \
+pycharm
+@endcode
+
+To produce documentation in PDF format (not recommended on virtual machine), install LaTeX:
+
+@code{.sh}
+sudo apt-get install texlive-latex-recommended
+@endcode
+
+
+\subsection sec_compile Compilation
+
+Make sure you have access to the PMSCO Git repository and set up your Git environment.
+Depending on your setup, location and permissions, one of the following addresses may work.
+Private key authentication is usually recommended except on shared computers.
+
+| Repository | Access |
+| --- | --- |
+| `git@git.psi.ch:pearl/pmsco.git` | PSI intranet, SSH private key authentication |
+| `https://git.psi.ch/pearl/pmsco.git` | PSI intranet, password prompt |
+| `git@gitlab.psi.ch:pearl/pmsco.git` | Public repository, SSH private key authentication |
+| `https://gitlab.psi.ch/pearl/pmsco.git` | Public repository, password prompt |
+
+Clone the code repository using one of these repositiory addresses and switch to the desired branch:
+
+@code{.sh}
+cd ~
+git clone git@git.psi.ch:pearl/pmsco.git pmsco
+cd pmsco
+git checkout master
+git checkout -b my_branch
+@endcode
+
+The compilation of the various modules is started by <code>make all</code>.
+The compilation step is necessary only once after installation.
+
+If the compilation of _loess.so failes due to a missing BLAS library,
+try to set a link to the BLAS library as follows (the actual file names may vary due to the actual distribution or version):
+@code{.sh}
+cd /usr/lib
+sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
+@endcode
+
+
+\subsection sec_test Tests
+
+Run the unit tests.
+They should pass successfully.
+Re-check from time to time.
+
+@code{.sh}
+cd ~/pmsco
+nosetests
+@endcode
+
+Run the twoatom project to check the compilation of the calculation programs.
+
+@code{.sh}
+cd ~/pmsco
+mkdir work
+cd work
+mkdir twoatom
+cd twoatom/
+nice python ~/pmsco/projects/twoatom/twoatom.py  -s ~/pmsco/projects/twoatom/twoatom_energy_alpha.etpai -o twoatom_energy_alpha -m single
+@endcode
+
+To learn more about running PMSCO, see @ref pag_run.
+*/
--- a/docs/src/introduction.dox
+++ b/docs/src/introduction.dox
@ -0,0 +1,61 @@
+/*! @mainpage Introduction
+\section sec_intro Introduction
+
+PMSCO stands for PEARL multiple-scattering cluster calculations and structural optimization.
+It is a collection of computer programs to calculate photoelectron diffraction patterns,
+and to optimize structural models based on measured data.
+
+The actual scattering calculation is done by code developed by other parties.
+While the scattering program typically calculates a diffraction pattern based on a set of static parameters and a specific coordinate file in a single process,
+PMSCO wraps around that program to facilitate parameter handling, cluster building, structural optimization and parallel processing.
+
+In the current version, the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code
+developed by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley (1999) is used for scattering calculations.
+Other code can be integrated as well.
+Initially, support for the MSC program by Kaduwela, Friedman, and Fadley was planned but is currently not maintained.
+PMSCO is written in Python 2.7.
+EDAC is written in C++, MSC in Fortran.
+PMSCO interacts with the calculation programs through Python wrappers for C++ or Fortran.
+
+The MSC and EDAC source code is contained in the same software repository.
+The PMSCO, MSC, and EDAC programs may not be used outside the PEARL group without an explicit agreement by the respective original authors.
+Users of the PMSCO code are requested to coordinate and share the development of the code with the original author.
+Please read and respect the respective license agreements.
+
+
+\section sec_intro_highlights Highlights
+
+- angle or energy scanned XPD.
+- various scanning modes including energy, polar angle, azimuthal angle, analyser angle.
+- averaging over multiple symmetries (domains or emitters).
+- global optimization of multiple scans.
+- structural optimization algorithms: particle swarm optimization, grid search, gradient search.
+- calculation of the modulation function.
+- calculation of the weighted R-factor.
+- automatic parallel processing using OpenMPI.
+
+
+\section sec_project Optimization Projects
+
+To set up a new optimization project, you need to:
+
+- create a new directory under projects.
+- create a new Python module in this directory, e.g., my_project.py.
+- implement a sub-class of project.Project in my_project.py.
+- override the create_cluster, create_params, and create_domain methods.
+- optionally, override the combine_symmetries and combine_scans methods.
+- add a global function create_project to my_project.py.
+- provide experimental data files (intensity or modulation function).
+
+For details, see the documentation of the Project class,
+and the example projects.
+
+
+\section sec_intro_start Getting Started
+
+- @ref pag_concepts
+- @ref pag_install
+- @ref pag_run
+- @ref pag_command
+
+*/
--- a/docs/src/modules.dot
+++ b/docs/src/modules.dot
@ -0,0 +1,51 @@
+digraph "modules" {
+node [fillcolor="transparent"];
+
+main  [label="__main__.py"];
+pmsco  [label="pmsco.py"];
+project  [label="project.py"];
+dispatch  [label="dispatch.py"];
+handlers  [label="handlers.py"];
+gradient  [label="gradient.py"];
+grid  [label="grid.py"];
+swarm  [label="swarm.py"];
+cluster  [label="cluster.py"];
+data  [label="data.py"];
+
+calc_interface  [label="calc_interface.py"];
+edac_interface  [label="edac_interface.py"];
+edac  [label="_edac.so"];
+loess [label="_loess.so"];
+
+custom [label="custom.py", fillcolor="red"];
+
+main -> pmsco;
+
+pmsco -> project;
+pmsco -> swarm;
+pmsco -> grid;
+pmsco -> gradient;
+pmsco -> dispatch;
+
+project -> loess;
+project -> cluster;
+project -> data;
+
+dispatch -> calc_interface;
+dispatch -> handlers;
+
+handlers -> project;
+
+gradient -> handlers;
+grid -> handlers;
+swarm -> handlers;
+
+calc_interface -> edac_interface;
+edac_interface -> data;
+edac_interface -> cluster;
+edac_interface -> edac;
+
+custom -> project;
+custom -> cluster;
+custom -> data;
+}
--- a/docs/src/processes.dot
+++ b/docs/src/processes.dot
@ -0,0 +1,27 @@
+digraph "processes" {
+
+optimizer;
+symmetrizer;
+parallelizer;
+comparator;
+cluster_gen [label="cluster generator"];
+
+{
+rank="same";
+edac1 [label="EDAC 1"];
+edac2 [label="EDAC 2"];
+edacN [label="EDAC N"];
+edac2 -> edacN [style="dotted", dir="none"];
+}
+
+optimizer -> symmetrizer;
+symmetrizer -> scanner [label="N"];
+scanner -> parallelizer [label="N x M"];
+parallelizer -> cluster;
+parallelizer -> edac1;
+parallelizer -> edac2;
+parallelizer -> edacN;
+
+optimizer -> comparator;
+
+}
--- a/docs/src/tasks.dot
+++ b/docs/src/tasks.dot
@ -0,0 +1,95 @@
+digraph "tasks" {
+nodesep=0.3;
+node [fillcolor="transparent", width=1.0, height=0.7];
+//node [fillcolor="transparent", height=0.7];
+newrank=true;
+compound=true;
+splines=false;
+
+//{rank=same;
+initial [shape=note, label="initial\nparameters"];
+result [shape=note, label="optimized\nparameters"];
+data [shape=note, label="experimental\ndata"];
+//}
+
+subgraph cluster_model {
+shape=rect;
+rank=same;
+label="model handler";
+create_model [label="generate\nmodel parameters"];
+evaluate_model [label="evaluate\nmodel"];
+}
+custom_modf [label="modulation\nfunction", shape=cds];
+{rank=same; create_model; evaluate_model; custom_modf;}
+custom_modf -> evaluate_model [lhead=cluster_model];
+initial -> create_model;
+data -> evaluate_model;
+result -> evaluate_model [dir=back];
+create_model -> result [dir=back];
+
+
+subgraph cluster_scan {
+label="scan handler";
+rank=same;
+create_scan [label="define\nscan\ntasks"];
+combine_scan [label="gather\nscan\nresults"];
+}
+custom_scan [label="scan\nconfiguration", shape=note];
+{rank=same; custom_scan; create_scan; combine_scan;}
+custom_scan -> create_scan [lhead=cluster_scan];
+
+subgraph cluster_symmetry {
+label="symmetry handler";
+rank=same;
+create_symmetry [label="define\nsymmetry\ntasks"];
+combine_symmetry  [label="gather\nsymmetry\nresults"];
+}
+custom_symmetry [label="symmetry\ndefinition", shape=cds];
+{rank=same; create_symmetry; combine_symmetry; custom_symmetry;}
+custom_symmetry -> combine_symmetry [lhead=cluster_symmetry];
+
+subgraph cluster_emitter {
+label="emitter handler";
+rank=same;
+create_emitter [label="define\nemitter\ntasks"];
+combine_emitter [label="gather\nemitter\nresults"];
+}
+custom_emitter [label="emitter\nconfiguration", shape=cds];
+{rank=same; custom_emitter; create_emitter; combine_emitter;}
+custom_emitter -> combine_emitter [lhead=cluster_emitter];
+
+subgraph cluster_region {
+label="region handler";
+rank=same;
+create_region [label="define\nregion\ntasks"];
+combine_region [label="gather\nregion\nresults"];
+}
+custom_region [label="scan\nconfiguration", shape=note];
+{rank=same; custom_region; create_region; combine_region;}
+custom_region -> create_region [lhead=cluster_region];
+
+
+subgraph cluster_edac {
+    label="parallel computing";
+   	edac [label=EDAC, peripheries=5];
+}
+create_cluster [label="cluster\ngenerator", shape=cds];
+{rank=same; create_cluster; edac;}
+create_cluster -> edac;
+
+create_model -> create_scan [label="level 1 tasks"];
+evaluate_model -> combine_scan [label="level 1 results", dir=back];
+
+create_scan -> create_symmetry [label="level 2 tasks"];
+combine_scan -> combine_symmetry [label="level 2 results", dir=back];
+
+create_symmetry -> create_emitter [label="level 3 tasks"];
+combine_symmetry -> combine_emitter [label="level 3 results", dir=back];
+
+create_emitter -> create_region [label="level 4 tasks"];
+combine_emitter -> combine_region [label="level 4 results", dir=back];
+
+create_region -> edac [label="level 5 tasks"];
+combine_region -> edac [label="level 5 results", dir=back];
+
+}
--- a/docs/src/tasks_legend.dot
+++ b/docs/src/tasks_legend.dot
@ -0,0 +1,10 @@
+digraph "tasks" {
+node [fillcolor="transparent", width=1.0, height=0.7];
+
+data [shape=note, label="input\noutput"];
+task [label="process\nunit", shape=box];
+custom [label="user\ncode", shape="cds"];
+process [label="process"];
+
+task -> process -> custom -> data [style=invis];
+}