4 Commits

211 changed files with 44602 additions and 6738 deletions

View File

@@ -0,0 +1,51 @@
name: build and test the package
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check runner environment
run: |
uname -a
lsb_release -a
echo "Runner home: $HOME"
- name: check out
uses: actions/checkout@v5
- name: set up compilers
run: |
sudo apt-get update
sudo apt-get -y install binutils build-essential g++ gcc gfortran libblas-dev liblapack-dev openmpi-bin openmpi-common sqlite3
- name: set up python
uses: actions/setup-python@v6
with:
python-version: '3.12'
- name: install uv
uses: astral-sh/setup-uv@v7
with:
version: "0.9.18"
enable-cache: true
- name: lint with ruff
# configuration is in pyproject.toml
run: |
uvx ruff check --extend-exclude=.venv,build pmsco
- name: install dependencies
run: uv sync --locked --all-extras --dev
- name: tests
run: |
uv run nosetests

View File

@@ -0,0 +1,45 @@
name: build and deploy documentation
on:
push:
branches:
- master
jobs:
build-and-deploy:
runs-on: ubuntu-latest
container:
image: gitea.psi.ch/pearl/docs
credentials:
username: ${{ gitea.actor }}
password: ${{ secrets.package_token }}
steps:
- name: checkout
working-directory: /app
run: |
git clone --branch master --single-branch https://${{ secrets.REPO_TOKEN }}@gitea.psi.ch/${{ github.repository }}.git
- name: build
working-directory: /app/pmsco/docs
run: |
export REVISION=$(shell git describe --always --tags --dirty --long || echo "unknown, "`date +"%F %T %z"`)
export OUTDIR=/app/build
doxygen config.dox
- name: configure git
working-directory: /app/pmsco
run: |
git config --global user.name "Gitea Actions"
git config --global user.email "actions@gitea.local"
- name: push to gitea-pages
working-directory: /app/pmsco
run: |
git checkout --orphan gitea-pages
git reset --hard
cp -r /app/build/html/* .
git add .
git commit -m "Deploy documentation to gitea"
git push -f https://${{ secrets.REPO_TOKEN }}@gitea.psi.ch/${{ github.repository }}.git gitea-pages

12
.githooks/install-hooks.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/usr/bin/env bash
echo "Setting up Git hooks..."
cd "$(dirname "$0")"
cd ..
# Create symlinks
ln -sf ../../.githooks/pre-commit .git/hooks/pre-commit
ln -sf ../../.githooks/pre-push .git/hooks/pre-push
chmod +x .git/hooks/*
echo "Git hooks installed successfully!"

34
.githooks/pre-commit Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
# .git/hooks/pre-commit
# requires uv
# Track overall status
PASS=true
# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${YELLOW}Running pre-commit checks...${NC}"
PY_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep '\.py$')
# Python checks
if [ -n "$PY_FILES" ]; then
echo -e "${YELLOW}Checking Python files...${NC}"
if ! uvx ruff check --extend-exclude=.*,build*; then
PASS=false
fi
fi
# Final status
if [ "$PASS" = true ]; then
echo -e "${GREEN}All checks passed!${NC}"
exit 0
else
echo -e "${RED}Some checks failed. Please fix issues before committing.${NC}"
exit 1
fi

43
.githooks/pre-push Executable file
View File

@@ -0,0 +1,43 @@
#!/bin/bash
# .git/hooks/pre-push
# Track overall status
PASS=true
# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${YELLOW}Running pre-push checks...${NC}"
PY_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep '\.py$')
# Python checks
echo -e "${YELLOW}Checking Python files...${NC}"
# Lint
if ! uvx ruff check --extend-exclude=.*,build*; then
PASS=false
fi
# Compile
uv sync
# Run different test suites based on changed files
echo -e "${YELLOW}Running Python tests...${NC}"
if ! uv run nosetests; then
echo -e "Tests failed. Push aborted."
PASS=false
fi
# Final status
if [ "$PASS" = true ]; then
echo -e "${GREEN}All checks passed!${NC}"
exit 0
else
echo -e "${RED}Some checks failed. Please fix issues before committing.${NC}"
exit 1
fi

3
.gitignore vendored
View File

@@ -2,6 +2,8 @@ work/*
debug/*
lib/*
dev/*
build/*
__pycache__/*
*.pyc
*.o
*.so
@@ -15,3 +17,4 @@ dev/*
.ropeproject/*
.fuse*
.trash
.wraplock

72
CHANGES.md Normal file
View File

@@ -0,0 +1,72 @@
Release 4.2.0 (2026-01-01)
==========================
- Switch to Astral-UV package manager
- CI lint, build, test workflow in gitea
- Automated documentation workflow in gitea
- Recommended Python version 3.12 (compatibility 3.10-3.13)
- Multipole expansion
- Table optimization mode
- Integrate phagen scattering amplitude calculator
- Select modulation and R-factor functions in runfile
- Parametric holo scan generator
- Namespace package installation, support for editable installation
- Simplified command line
- Meson build system
- Differential cross section in periodic table
- Configurable reports
- Path resolution
- Database interface for reports
- Runfile based job scheduling
Release 3.0.0 (2021-02-01)
==========================
- Compatibility with recent conda and singularity versions
- Installation: include plantuml.jar
- Documentation: replace doxypy by doxypypy
- Redefine output_file property
- Documentation of run file interface
- Introduce runfile interface
- Set legacy Fortran for compatibility with recent compiler
- Graphics: fixed color range for modulation functions
- Cluster: build_element accepts symbol or number
- Graphics: swarm plot
- Graphics: genetic chart
- Periodic table: use common binding energies in condensed matter XPS
- Periodic table: reformat bindingenergy.json, add more import/export functions
- Spectrum: add plot cross section function
Release 2.2.0 (2020-09-04)
==========================
- Demo project for arbitrary molecule (cluster file)
- Bugfix: DATA CORRUPTION in phagen translator (emitter mix-up)
- Bugfix: load native cluster file: wrong column order
- Bugfix: initial-state command line option not respected
- Photoionization cross sections and spectrum simulator
- Database: use local lock objects
- Database: create view on results and models
- REFACTORING: Domain -> ModelSpace, Params -> CalculatorParams
- REFACTORING: symmetry -> domain
- Possible conda/mpi4py conflict - changed installation procedure
- Cluster: new calc_scattering_angles function
- Include a periodic table of binding energies of the elements
- Clean up files in the main loop and in the end
- Bugfix: database ingestion overwrites results from previous jobs
- Time out at least 10 minutes before the hard time limit given on the command line
- Cluster: new get_center function
- Bugfix: type error in grid optimizer
- Bugfix: file extension in phagen/makefile
- Dispatch: new algo to distribute processing slots to task levels
- Bugfix: load single-line data files correctly!
- Cluster generator for zincblende crystal
- Phagen translator: fix phase convention (MAJOR), fix single-energy
- Dispatch: give more priority to mid-level tasks in single mode
- Improve performance of cluster add_bulk, add_layer and rotate
- Unit test for Cluster.translate, bugfix in translate and relax
- Fix compatibility with numpy >= 1.14, require numpy >= 1.13
- Database: introduce job-tags
- qpmsco: delete code after execution

201
LICENSE Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2015-2020 Paul Scherrer Institut
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -5,10 +5,10 @@ List of Contributors
Original Author
---------------
Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
- Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
Contributors
------------
- Frederik Schirdewahn, <mailto:frederik.schirdewahn@psi.ch>

111
README.md
View File

@@ -1,70 +1,129 @@
Introduction
============
PMSCO stands for PEARL multiple-scattering cluster calculations and structural optimization.
It is a collection of computer programs to calculate photoelectron diffraction patterns,
and to optimize structural models based on measured data.
PMSCO (PSI multiple-scattering cluster calculations and structural optimization)
is a Python-based workflow engine to calculate photoelectron diffraction patterns,
and to optimize structural models based on measured data using machine learning techniques.
PMSCO was developed at the [Paul Scherrer Institut (PSI)](https://www.psi.ch/)
by the team of the [PEARL beamline](https://www.psi.ch/en/sls/pearl).
The actual scattering calculation is done by code developed by other parties.
PMSCO wraps around that program and facilitates parameter handling, cluster building, structural optimization and parallel processing.
PMSCO wraps around those programs and facilitates parameter handling, cluster building, structural optimization and parallel processing.
In the current version, the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code
developed by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley (1999) is used for scattering calculations.
Other code can be integrated as well.
Instead of EDAC built-in routines, alternatively,
the PHAGEN program from [MsSpec-1.0](https://msspec.cnrs.fr/index.html) can be used to calculate atomic scattering factors.
Highlights
----------
- angle or energy scanned XPD.
- various scanning modes including energy, polar angle, azimuthal angle, analyser angle.
- averaging over multiple symmetries (domains or emitters).
- angle and energy scanned XPD.
- various scanning modes including energy, manipulator angle (polar/azimuthal), emission angle.
- averaging over multiple domains and emitters.
- global optimization of multiple scans.
- structural optimization algorithms: particle swarm optimization, grid search, gradient search.
- structural optimization algorithms: particle swarm optimization, genetic algorithm, grid scan, table scan.
- detailed reports and graphs of result files.
- calculation of the modulation function.
- calculation of the weighted R-factor.
- automatic parallel processing using OpenMPI.
- compatible with Slurm resource manager on Linux cluster machines.
Installation
============
PMSCO is written in Python 3.6 and compatible with Python 2.7.
The code will run in any recent Linux environment on a workstation or in a virtual machine.
Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
For optimization jobs, a cluster with 20-50 available processor cores is recommended.
PMSCO is written in Python. The recommended Python version is 3.12.
Further requirements are the GNU compiler collection, BLAS/LAPACK libraries, OpenMPI and a package manager such as uv, pip or conda.
For optimization jobs, a cluster machine with 20-50 available processor cores is recommended.
Smaller jobs run on any recent Linux workstation.
The code requires about 2 GB of RAM per process.
Detailed installation instructions and dependencies can be found in the documentation
(docs/src/installation.dox).
A [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) compiler with Doxypy is required to generate the documentation in HTML or LaTeX format.
The public distribution of PMSCO does not contain the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code.
Please obtain the EDAC source code from the original author, copy it to the pmsco/edac directory, and apply the edac_all.patch patch.
License
=======
The source code of PMSCO is licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
Please read and respect the license agreement.
This _does not include_ the calculation packages contained in the subprojects folder which are licensed separately.
Please share your extensions of the code with the original author.
The gitlab facility can be used to create forks and to submit pull requests.
Attribution notices for your contributions shall be added to the NOTICE.md file.
- Please read and respect the respective license agreements.
- Please acknowledge the use of the code.
- Please consider sharing your developments with the original author.
Due to different copyright terms, the third-party calculation programs are not contained in the public software repository.
These programs may not be used without an explicit agreement by the respective original authors.
Author
------
Authors
-------
Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
- Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
- Frederik Schirdewahn, <mailto:frederik.schirdewahn@psi.ch>
Copyright
---------
Copyright 2015-2018 by [Paul Scherrer Institut](http://www.psi.ch)
Copyright 2015-2025 by [Paul Scherrer Institut](http://www.psi.ch)
Release Notes
=============
For a detailed list of changes, see the CHANGES.md file.
4.2.0 (2026-01-01)
------------------
- Recommended Python version 3.12 (compatibility 3.10-3.13)
- Build system and package environment
- Switch to Astral-UV package manager
- Meson build system for Fortran, C and C++ extension modules
- Namespace package installation, support for editable installation
- CI lint, build, test workflow in gitea
- Automated documentation workflow in gitea
- User interface
- Simplified command line, all configuration via runfile and/or project class
- Select modulation and R-factor functions in runfile
- Parametric holo scan generator
- Configurable reports
- Path resolution in runfile
- Database interface for reports
- Runfile based job scheduling
- Calculation features
- Multipole expansion
- Table optimization mode
- Integrate phagen scattering amplitude calculator
- Differential cross section in periodic table
3.0.0 (2021-02-08)
------------------
- Run file interface replaces command line arguments:
- Specify all run-time parameters in a JSON-formatted text file.
- Override any public attribute of the project class.
- Only the name of the run file is needed on the command line.
- The command line interface is still available, some default values and the handling of directory paths have changed.
Check your code for compatibility.
- Integrated job scheduling with the Slurm resource manager:
- Declare all job arguments in the run file and have PMSCO submit the job.
- Graphics scripts for genetic chart and swarm population (experimental feature).
- Update for compatibility with recent Ubuntu (20.04), Anaconda (4.8) and Singularity (3.7).
- Drop compatibility with Python 2.7, minimum requirement is Python 3.6.
2.2.0 (2020-09-04)
------------------
This release breaks existing project code unless the listed refactorings are applied.
- Major refactoring: The 'symmetry' calculation level is renamed to 'domain'.
The previous Domain class is renamed to ModelSpace, Params to CalculatorParams.
The refactorings must be applied to project code as well.
- Included periodic table of elements with electron binding energies and scattering cross-sections.
- Various bug fixes in cluster routines, data file handling, and in the PHAGEN interface.
- Experimental sqlite3 database interface for optimization results.

View File

@@ -1,136 +0,0 @@
#!/bin/bash
#
# Slurm script template for PMSCO calculations on the Ra cluster
# based on run_mpi_HPL_nodes-2.sl by V. Markushin 2016-03-01
#
# this version checks out the source code from a git repository
# to a temporary location and compiles the code.
# this is to minimize conflicts between different jobs
# but requires that each job has its own git commit.
#
# Use:
# - enter the appropriate parameters and save as a new file.
# - call the sbatch command to pass the job script.
# request a specific number of nodes and tasks.
# example:
# sbatch --nodes=2 --ntasks-per-node=24 --time=02:00:00 run_pmsco.sl
# the qpmsco script does all this for you.
#
# PMSCO arguments
# copy this template to a new file, and set the arguments
#
# PMSCO_WORK_DIR
# path to be used as working directory.
# contains the script derived from this template
# and a copy of the pmsco code in the 'pmsco' directory.
# receives output and temporary files.
#
# PMSCO_PROJECT_FILE
# python module that declares the project and starts the calculation.
# must include the file path relative to $PMSCO_WORK_DIR.
#
# PMSCO_OUT
# name of output file. should not include a path.
#
# all paths are relative to $PMSCO_WORK_DIR or (better) absolute.
#
#
# Further arguments
#
# PMSCO_JOBNAME (required)
# the job name is the base name for output files.
#
# PMSCO_WALLTIME_HR (integer, required)
# wall time limit in hours. must be integer, minimum 1.
# this value is passed to PMSCO.
# it should specify the same amount of wall time as requested from the scheduler.
#
# PMSCO_PROJECT_ARGS (optional)
# extra arguments that are parsed by the project module.
#
#SBATCH --job-name="_PMSCO_JOBNAME"
#SBATCH --output="_PMSCO_JOBNAME.o.%j"
#SBATCH --error="_PMSCO_JOBNAME.e.%j"
PMSCO_WORK_DIR="_PMSCO_WORK_DIR"
PMSCO_JOBNAME="_PMSCO_JOBNAME"
PMSCO_WALLTIME_HR=_PMSCO_WALLTIME_HR
PMSCO_PROJECT_FILE="_PMSCO_PROJECT_FILE"
PMSCO_OUT="_PMSCO_JOBNAME"
PMSCO_PROJECT_ARGS="_PMSCO_PROJECT_ARGS"
module load psi-python36/4.4.0
module load gcc/4.8.5
module load openmpi/3.1.3
source activate pmsco3
echo '================================================================================'
echo "=== Running $0 at the following time and place:"
date
/bin/hostname
cd $PMSCO_WORK_DIR
pwd
ls -lA
#the intel compiler is currently not compatible with mpi4py. -mm 170131
#echo
#echo '================================================================================'
#echo "=== Setting the environment to use Intel Cluster Studio XE 2016 Update 2 intel/16.2:"
#cmd="source /opt/psi/Programming/intel/16.2/bin/compilervars.sh intel64"
#echo $cmd
#$cmd
echo
echo '================================================================================'
echo "=== The environment is set as following:"
env
echo
echo '================================================================================'
echo "BEGIN test"
which mpirun
cmd="mpirun /bin/hostname"
echo $cmd
$cmd
echo "END test"
echo
echo '================================================================================'
echo "BEGIN mpirun pmsco"
echo
cd "$PMSCO_WORK_DIR"
cd pmsco
echo "code revision"
git log --pretty=tformat:'%h %ai %d' -1
make -C pmsco all
python -m compileall pmsco
python -m compileall projects
echo
cd "$PMSCO_WORK_DIR"
PMSCO_CMD="python pmsco/pmsco $PMSCO_PROJECT_FILE"
PMSCO_ARGS="$PMSCO_PROJECT_ARGS"
if [ -n "$PMSCO_SCAN_FILES" ]; then
PMSCO_ARGS="-s $PMSCO_SCAN_FILES $PMSCO_ARGS"
fi
if [ -n "$PMSCO_OUT" ]; then
PMSCO_ARGS="-o $PMSCO_OUT $PMSCO_ARGS"
fi
if [ "$PMSCO_WALLTIME_HR" -ge 1 ]; then
PMSCO_ARGS="-t $PMSCO_WALLTIME_HR $PMSCO_ARGS"
fi
if [ -n "$PMSCO_LOGLEVEL" ]; then
PMSCO_ARGS="--log-level $PMSCO_LOGLEVEL --log-file $PMSCO_JOBNAME.log $PMSCO_ARGS"
fi
# Do no use the OpenMPI specific options, like "-x LD_LIBRARY_PATH", with the Intel mpirun.
cmd="mpirun $PMSCO_CMD $PMSCO_ARGS"
echo $cmd
$cmd
echo "END mpirun pmsco"
echo '================================================================================'
cd "$PMSCO_WORK_DIR"
rm -rf pmsco
date
ls -lAtr
echo '================================================================================'
exit 0

View File

@@ -1,157 +0,0 @@
#!/bin/bash
#
# Slurm script template for PMSCO calculations on the Ra cluster
# based on run_mpi_HPL_nodes-2.sl by V. Markushin 2016-03-01
#
# Use:
# - enter the appropriate parameters and save as a new file.
# - call the sbatch command to pass the job script.
# request a specific number of nodes and tasks.
# example:
# sbatch --nodes=2 --ntasks-per-node=24 --time=02:00:00 run_pmsco.sl
#
# PMSCO arguments
# copy this template to a new file, and set the arguments
#
# PMSCO_WORK_DIR
# path to be used as working directory.
# contains the script derived from this template.
# receives output and temporary files.
#
# PMSCO_PROJECT_FILE
# python module that declares the project and starts the calculation.
# must include the file path relative to $PMSCO_WORK_DIR.
#
# PMSCO_SOURCE_DIR
# path to the pmsco source directory
# (the directory which contains the bin, lib, pmsco sub-directories)
#
# PMSCO_SCAN_FILES
# list of scan files.
#
# PMSCO_OUT
# name of output file. should not include a path.
#
# all paths are relative to $PMSCO_WORK_DIR or (better) absolute.
#
#
# Further arguments
#
# PMSCO_JOBNAME (required)
# the job name is the base name for output files.
#
# PMSCO_WALLTIME_HR (integer, required)
# wall time limit in hours. must be integer, minimum 1.
# this value is passed to PMSCO.
# it should specify the same amount of wall time as requested from the scheduler.
#
# PMSCO_MODE (optional)
# calculation mode: single, swarm, grid, gradient
#
# PMSCO_CODE (optional)
# calculation code: edac, msc, test
#
# PMSCO_LOGLEVEL (optional)
# request log level: DEBUG, INFO, WARNING, ERROR
# create a log file based on the job name.
#
# PMSCO_PROJECT_ARGS (optional)
# extra arguments that are parsed by the project module.
#
#SBATCH --job-name="_PMSCO_JOBNAME"
#SBATCH --output="_PMSCO_JOBNAME.o.%j"
#SBATCH --error="_PMSCO_JOBNAME.e.%j"
PMSCO_WORK_DIR="_PMSCO_WORK_DIR"
PMSCO_JOBNAME="_PMSCO_JOBNAME"
PMSCO_WALLTIME_HR=_PMSCO_WALLTIME_HR
PMSCO_PROJECT_FILE="_PMSCO_PROJECT_FILE"
PMSCO_MODE="_PMSCO_MODE"
PMSCO_CODE="_PMSCO_CODE"
PMSCO_SOURCE_DIR="_PMSCO_SOURCE_DIR"
PMSCO_SCAN_FILES="_PMSCO_SCAN_FILES"
PMSCO_OUT="_PMSCO_JOBNAME"
PMSCO_LOGLEVEL="_PMSCO_LOGLEVEL"
PMSCO_PROJECT_ARGS="_PMSCO_PROJECT_ARGS"
module load psi-python36/4.4.0
module load gcc/4.8.5
module load openmpi/3.1.3
source activate pmsco3
echo '================================================================================'
echo "=== Running $0 at the following time and place:"
date
/bin/hostname
cd $PMSCO_WORK_DIR
pwd
ls -lA
#the intel compiler is currently not compatible with mpi4py. -mm 170131
#echo
#echo '================================================================================'
#echo "=== Setting the environment to use Intel Cluster Studio XE 2016 Update 2 intel/16.2:"
#cmd="source /opt/psi/Programming/intel/16.2/bin/compilervars.sh intel64"
#echo $cmd
#$cmd
echo
echo '================================================================================'
echo "=== The environment is set as following:"
env
echo
echo '================================================================================'
echo "BEGIN test"
echo "=== Intel native mpirun will get the number of nodes and the machinefile from Slurm"
which mpirun
cmd="mpirun /bin/hostname"
echo $cmd
$cmd
echo "END test"
echo
echo '================================================================================'
echo "BEGIN mpirun pmsco"
echo "Intel native mpirun will get the number of nodes and the machinefile from Slurm"
echo
echo "code revision"
cd "$PMSCO_SOURCE_DIR"
git log --pretty=tformat:'%h %ai %d' -1
python -m compileall pmsco
python -m compileall projects
cd "$PMSCO_WORK_DIR"
echo
PMSCO_CMD="python $PMSCO_SOURCE_DIR/pmsco $PMSCO_PROJECT_FILE"
PMSCO_ARGS="$PMSCO_PROJECT_ARGS"
if [ -n "$PMSCO_SCAN_FILES" ]; then
PMSCO_ARGS="-s $PMSCO_SCAN_FILES $PMSCO_ARGS"
fi
if [ -n "$PMSCO_CODE" ]; then
PMSCO_ARGS="-c $PMSCO_CODE $PMSCO_ARGS"
fi
if [ -n "$PMSCO_MODE" ]; then
PMSCO_ARGS="-m $PMSCO_MODE $PMSCO_ARGS"
fi
if [ -n "$PMSCO_OUT" ]; then
PMSCO_ARGS="-o $PMSCO_OUT $PMSCO_ARGS"
fi
if [ "$PMSCO_WALLTIME_HR" -ge 1 ]; then
PMSCO_ARGS="-t $PMSCO_WALLTIME_HR $PMSCO_ARGS"
fi
if [ -n "$PMSCO_LOGLEVEL" ]; then
PMSCO_ARGS="--log-level $PMSCO_LOGLEVEL --log-file $PMSCO_JOBNAME.log $PMSCO_ARGS"
fi
which mpirun
ls -l "$PMSCO_SOURCE_DIR"
ls -l "$PMSCO_PROJECT_FILE"
# Do no use the OpenMPI specific options, like "-x LD_LIBRARY_PATH", with the Intel mpirun.
cmd="mpirun $PMSCO_CMD $PMSCO_ARGS"
echo $cmd
$cmd
echo "END mpirun pmsco"
echo '================================================================================'
date
ls -lAtr
echo '================================================================================'
exit 0

View File

@@ -1,178 +0,0 @@
#!/bin/bash
#
# SGE script template for MSC calculations
#
# This script uses the tight integration of openmpi-1.4.5-gcc-4.6.3 in SGE
# using the parallel environment (PE) "orte".
# This script must be used only with qsub command - do NOT run it as a stand-alone
# shell script because it will start all processes on the local node.
#
# PhD arguments
# copy this template to a new file, and set the arguments
#
# PHD_WORK_DIR
# path to be used as working directory.
# contains the SGE script derived from this template.
# receives output and temporary files.
#
# PHD_PROJECT_FILE
# python module that declares the project and starts the calculation.
# must include the file path relative to $PHD_WORK_DIR.
#
# PHD_SOURCE_DIR
# path to the pmsco source directory
# (the directory which contains the bin, lib, pmsco sub-directories)
#
# PHD_SCAN_FILES
# list of scan files.
#
# PHD_OUT
# name of output file. should not include a path.
#
# all paths are relative to $PHD_WORK_DIR or (better) absolute.
#
#
# Further arguments
#
# PHD_JOBNAME (required)
# the job name is the base name for output files.
#
# PHD_NODES (required)
# number of computing nodes (processes) to allocate for the job.
#
# PHD_WALLTIME_HR (required)
# wall time limit (hours)
#
# PHD_WALLTIME_MIN (required)
# wall time limit (minutes)
#
# PHD_MODE (optional)
# calculation mode: single, swarm, grid, gradient
#
# PHD_CODE (optional)
# calculation code: edac, msc, test
#
# PHD_LOGLEVEL (optional)
# request log level: DEBUG, INFO, WARNING, ERROR
# create a log file based on the job name.
#
# PHD_PROJECT_ARGS (optional)
# extra arguments that are parsed by the project module.
#
PHD_WORK_DIR="_PHD_WORK_DIR"
PHD_JOBNAME="_PHD_JOBNAME"
PHD_NODES=_PHD_NODES
PHD_WALLTIME_HR=_PHD_WALLTIME_HR
PHD_WALLTIME_MIN=_PHD_WALLTIME_MIN
PHD_PROJECT_FILE="_PHD_PROJECT_FILE"
PHD_MODE="_PHD_MODE"
PHD_CODE="_PHD_CODE"
PHD_SOURCE_DIR="_PHD_SOURCE_DIR"
PHD_SCAN_FILES="_PHD_SCAN_FILES"
PHD_OUT="_PHD_JOBNAME"
PHD_LOGLEVEL="_PHD_LOGLEVEL"
PHD_PROJECT_ARGS="_PHD_PROJECT_ARGS"
# Define your job name, parallel environment with the number of slots, and run time:
#$ -cwd
#$ -N _PHD_JOBNAME.job
#$ -pe orte _PHD_NODES
#$ -l ram=2G
#$ -l s_rt=_PHD_WALLTIME_HR:_PHD_WALLTIME_MIN:00
#$ -l h_rt=_PHD_WALLTIME_HR:_PHD_WALLTIME_MIN:30
#$ -V
###################################################
# Fix the SGE environment-handling bug (bash):
source /usr/share/Modules/init/sh
export -n -f module
# Load the environment modules for this job (the order may be important):
module load python/python-2.7.5
module load gcc/gcc-4.6.3
module load mpi/openmpi-1.4.5-gcc-4.6.3
module load blas/blas-20110419-gcc-4.6.3
module load lapack/lapack-3.4.2-gcc-4.6.3
export LD_LIBRARY_PATH=$PHD_SOURCE_DIR/lib/:$LD_LIBRARY_PATH
###################################################
# Set the environment variables:
MPIEXEC=$OPENMPI/bin/mpiexec
# OPENMPI is set by the mpi/openmpi-* module.
export OMP_NUM_THREADS=1
export OMPI_MCA_btl='openib,sm,self'
# export OMPI_MCA_orte_process_binding=core
##############
# BEGIN DEBUG
# Print the SGE environment on master host:
echo "================================================================"
echo "=== SGE job JOB_NAME=$JOB_NAME JOB_ID=$JOB_ID"
echo "================================================================"
echo DATE=`date`
echo HOSTNAME=`hostname`
echo PWD=`pwd`
echo "NSLOTS=$NSLOTS"
echo "PE_HOSTFILE=$PE_HOSTFILE"
cat $PE_HOSTFILE
echo "================================================================"
echo "Running environment:"
env
echo "================================================================"
echo "Loaded environment modules:"
module list 2>&1
echo
# END DEBUG
##############
##############
# Setup
cd "$PHD_SOURCE_DIR"
python -m compileall .
cd "$PHD_WORK_DIR"
ulimit -c 0
###################################################
# The command to run with mpiexec:
CMD="python $PHD_PROJECT_FILE"
ARGS="$PHD_PROJECT_ARGS"
if [ -n "$PHD_SCAN_FILES" ]; then
ARGS="-s $PHD_SCAN_FILES -- $ARGS"
fi
if [ -n "$PHD_CODE" ]; then
ARGS="-c $PHD_CODE $ARGS"
fi
if [ -n "$PHD_MODE" ]; then
ARGS="-m $PHD_MODE $ARGS"
fi
if [ -n "$PHD_OUT" ]; then
ARGS="-o $PHD_OUT $ARGS"
fi
if [ "$PHD_WALLTIME_HR" -ge 1 ]
then
ARGS="-t $PHD_WALLTIME_HR $ARGS"
else
ARGS="-t 0.5 $ARGS"
fi
if [ -n "$PHD_LOGLEVEL" ]; then
ARGS="--log-level $PHD_LOGLEVEL --log-file $PHD_JOBNAME.log $ARGS"
fi
# The MPI command to run:
MPICMD="$MPIEXEC --prefix $OPENMPI -x PATH -x LD_LIBRARY_PATH -x OMP_NUM_THREADS -x OMPI_MCA_btl -np $NSLOTS $CMD $ARGS"
echo "Command to run:"
echo "$MPICMD"
echo
exec $MPICMD
exit 0

View File

@@ -1,145 +0,0 @@
#!/bin/sh
#
# submission script for PMSCO calculations on the Ra cluster
#
# this version clones the current git repository at HEAD to the work directory.
# thus, version conflicts between jobs are avoided.
#
if [ $# -lt 1 ]; then
echo "Usage: $0 [NOSUB] GIT_TAG DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT [ARGS [ARGS [...]]]"
echo ""
echo " NOSUB (optional): do not submit the script to the queue. default: submit."
echo " GIT_TAG: git tag or branch name of the code. HEAD for current code."
echo " DESTDIR: destination directory. must exist. a sub-dir \$JOBNAME is created."
echo " JOBNAME (text): name of job. use only alphanumeric characters, no spaces."
echo " NODES (integer): number of computing nodes. (1 node = 24 or 32 processors)."
echo " do not specify more than 2."
echo " TASKS_PER_NODE (integer): 1...24, or 32."
echo " 24 or 32 for full-node allocation."
echo " 1...23 for shared node allocation."
echo " WALLTIME:HOURS (integer): requested wall time."
echo " 1...24 for day partition"
echo " 24...192 for week partition"
echo " 1...192 for shared partition"
echo " PROJECT: python module (file path) that declares the project and starts the calculation."
echo " ARGS (optional): any number of further PMSCO or project arguments (except time)."
echo ""
echo "the job script is written to \$DESTDIR/\$JOBNAME which is also the destination of calculation output."
exit 1
fi
# location of the pmsco package is derived from the path of this script
SCRIPTDIR="$(dirname $(readlink -f $0))"
SOURCEDIR="$(readlink -f $SCRIPTDIR/..)"
PMSCO_SOURCE_DIR="$SOURCEDIR"
# read arguments
if [ "$1" == "NOSUB" ]; then
NOSUB="true"
shift
else
NOSUB="false"
fi
if [ "$1" == "HEAD" ]; then
BRANCH_ARG=""
else
BRANCH_ARG="-b $1"
fi
shift
DEST_DIR="$1"
shift
PMSCO_JOBNAME=$1
shift
PMSCO_NODES=$1
PMSCO_TASKS_PER_NODE=$2
PMSCO_TASKS=$(expr $PMSCO_NODES \* $PMSCO_TASKS_PER_NODE)
shift 2
PMSCO_WALLTIME_HR=$1
PMSCO_WALLTIME_MIN=$(expr $PMSCO_WALLTIME_HR \* 60)
shift
# select partition
if [ $PMSCO_WALLTIME_HR -ge 25 ]; then
PMSCO_PARTITION="week"
else
PMSCO_PARTITION="day"
fi
if [ $PMSCO_TASKS_PER_NODE -lt 24 ]; then
PMSCO_PARTITION="shared"
fi
PMSCO_PROJECT_FILE="$(readlink -f $1)"
shift
PMSCO_PROJECT_ARGS="$*"
# set up working directory
cd "$DEST_DIR"
if [ ! -d "$PMSCO_JOBNAME" ]; then
mkdir "$PMSCO_JOBNAME"
fi
cd "$PMSCO_JOBNAME"
WORKDIR="$(pwd)"
PMSCO_WORK_DIR="$WORKDIR"
# copy code
PMSCO_SOURCE_REPO="file://$PMSCO_SOURCE_DIR"
echo "$PMSCO_SOURCE_REPO"
cd "$PMSCO_WORK_DIR"
git clone $BRANCH_ARG --single-branch --depth 1 $PMSCO_SOURCE_REPO pmsco || exit
cd pmsco
PMSCO_REV=$(git log --pretty=format:"%h, %ai" -1) || exit
cd "$WORKDIR"
echo "$PMSCO_REV" > revision.txt
# generate job script from template
sed -e "s:_PMSCO_WORK_DIR:$PMSCO_WORK_DIR:g" \
-e "s:_PMSCO_JOBNAME:$PMSCO_JOBNAME:g" \
-e "s:_PMSCO_NODES:$PMSCO_NODES:g" \
-e "s:_PMSCO_WALLTIME_HR:$PMSCO_WALLTIME_HR:g" \
-e "s:_PMSCO_PROJECT_FILE:$PMSCO_PROJECT_FILE:g" \
-e "s:_PMSCO_PROJECT_ARGS:$PMSCO_PROJECT_ARGS:g" \
"$SCRIPTDIR/pmsco.ra-git.template" > $PMSCO_JOBNAME.job
chmod u+x "$PMSCO_JOBNAME.job" || exit
# request nodes and tasks
#
# The option --ntasks-per-node is meant to be used with the --nodes option.
# (For the --ntasks option, the default is one task per node, use the --cpus-per-task option to change this default.)
#
# sbatch options
# --cores-per-socket=16
# 32 cores per node
# --partition=[shared|day|week]
# --time=8-00:00:00
# override default time limit (2 days in long queue)
# time formats: "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes", "days-hours:minutes:seconds"
# --mail-type=ALL
# --test-only
# check script but do not submit
#
SLURM_ARGS="--nodes=$PMSCO_NODES --ntasks-per-node=$PMSCO_TASKS_PER_NODE"
if [ $PMSCO_TASKS_PER_NODE -gt 24 ]; then
SLURM_ARGS="--cores-per-socket=16 $SLURM_ARGS"
fi
SLURM_ARGS="--partition=$PMSCO_PARTITION $SLURM_ARGS"
SLURM_ARGS="--time=$PMSCO_WALLTIME_HR:00:00 $SLURM_ARGS"
CMD="sbatch $SLURM_ARGS $PMSCO_JOBNAME.job"
echo $CMD
if [ "$NOSUB" != "true" ]; then
$CMD
fi
exit 0

View File

@@ -1,151 +0,0 @@
#!/bin/sh
#
# submission script for PMSCO calculations on the Ra cluster
#
# CAUTION: the job will execute the pmsco code which is present in the directory tree
# of this script _at the time of job execution_, not submission!
# before changing the code, make sure that all pending jobs have started execution,
# otherwise you will experience version conflicts.
# it's better to use the qpmsco.ra-git.sh script which clones the code.
if [ $# -lt 1 ]; then
echo "Usage: $0 [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]"
echo ""
echo " NOSUB (optional): do not submit the script to the queue. default: submit."
echo " DESTDIR: destination directory. must exist. a sub-dir \$JOBNAME is created."
echo " JOBNAME (text): name of job. use only alphanumeric characters, no spaces."
echo " NODES (integer): number of computing nodes. (1 node = 24 or 32 processors)."
echo " do not specify more than 2."
echo " TASKS_PER_NODE (integer): 1...24, or 32."
echo " 24 or 32 for full-node allocation."
echo " 1...23 for shared node allocation."
echo " WALLTIME:HOURS (integer): requested wall time."
echo " 1...24 for day partition"
echo " 24...192 for week partition"
echo " 1...192 for shared partition"
echo " PROJECT: python module (file path) that declares the project and starts the calculation."
echo " MODE: PMSCO calculation mode (single|swarm|gradient|grid)."
echo " ARGS (optional): any number of further PMSCO or project arguments (except mode and time)."
echo ""
echo "the job script is written to \$DESTDIR/\$JOBNAME which is also the destination of calculation output."
exit 1
fi
# location of the pmsco package is derived from the path of this script
SCRIPTDIR="$(dirname $(readlink -f $0))"
SOURCEDIR="$SCRIPTDIR/.."
PMSCO_SOURCE_DIR="$SOURCEDIR"
# read arguments
if [ "$1" == "NOSUB" ]; then
NOSUB="true"
shift
else
NOSUB="false"
fi
DEST_DIR="$1"
shift
PMSCO_JOBNAME=$1
shift
PMSCO_NODES=$1
PMSCO_TASKS_PER_NODE=$2
PMSCO_TASKS=$(expr $PMSCO_NODES \* $PMSCO_TASKS_PER_NODE)
shift 2
PMSCO_WALLTIME_HR=$1
PMSCO_WALLTIME_MIN=$(expr $PMSCO_WALLTIME_HR \* 60)
shift
# select partition
if [ $PMSCO_WALLTIME_HR -ge 25 ]; then
PMSCO_PARTITION="week"
else
PMSCO_PARTITION="day"
fi
if [ $PMSCO_TASKS_PER_NODE -lt 24 ]; then
PMSCO_PARTITION="shared"
fi
PMSCO_PROJECT_FILE="$(readlink -f $1)"
shift
PMSCO_MODE="$1"
shift
PMSCO_PROJECT_ARGS="$*"
# use defaults, override explicitly in PMSCO_PROJECT_ARGS if necessary
PMSCO_SCAN_FILES=""
PMSCO_LOGLEVEL=""
PMSCO_CODE=""
# set up working directory
cd "$DEST_DIR"
if [ ! -d "$PMSCO_JOBNAME" ]; then
mkdir "$PMSCO_JOBNAME"
fi
cd "$PMSCO_JOBNAME"
WORKDIR="$(pwd)"
PMSCO_WORK_DIR="$WORKDIR"
# provide revision information, requires git repository
cd "$SOURCEDIR"
PMSCO_REV=$(git log --pretty=format:"%h, %ai" -1)
if [ $? -ne 0 ]; then
PMSCO_REV="revision unknown, "$(date +"%F %T %z")
fi
cd "$WORKDIR"
echo "$PMSCO_REV" > revision.txt
# generate job script from template
sed -e "s:_PMSCO_WORK_DIR:$PMSCO_WORK_DIR:g" \
-e "s:_PMSCO_JOBNAME:$PMSCO_JOBNAME:g" \
-e "s:_PMSCO_NODES:$PMSCO_NODES:g" \
-e "s:_PMSCO_WALLTIME_HR:$PMSCO_WALLTIME_HR:g" \
-e "s:_PMSCO_PROJECT_FILE:$PMSCO_PROJECT_FILE:g" \
-e "s:_PMSCO_PROJECT_ARGS:$PMSCO_PROJECT_ARGS:g" \
-e "s:_PMSCO_CODE:$PMSCO_CODE:g" \
-e "s:_PMSCO_MODE:$PMSCO_MODE:g" \
-e "s:_PMSCO_SOURCE_DIR:$PMSCO_SOURCE_DIR:g" \
-e "s:_PMSCO_SCAN_FILES:$PMSCO_SCAN_FILES:g" \
-e "s:_PMSCO_LOGLEVEL:$PMSCO_LOGLEVEL:g" \
"$SCRIPTDIR/pmsco.ra.template" > $PMSCO_JOBNAME.job
chmod u+x "$PMSCO_JOBNAME.job"
# request nodes and tasks
#
# The option --ntasks-per-node is meant to be used with the --nodes option.
# (For the --ntasks option, the default is one task per node, use the --cpus-per-task option to change this default.)
#
# sbatch options
# --cores-per-socket=16
# 32 cores per node
# --partition=[shared|day|week]
# --time=8-00:00:00
# override default time limit (2 days in long queue)
# time formats: "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes", "days-hours:minutes:seconds"
# --mail-type=ALL
# --test-only
# check script but do not submit
#
SLURM_ARGS="--nodes=$PMSCO_NODES --ntasks-per-node=$PMSCO_TASKS_PER_NODE"
if [ $PMSCO_TASKS_PER_NODE -gt 24 ]; then
SLURM_ARGS="--cores-per-socket=16 $SLURM_ARGS"
fi
SLURM_ARGS="--partition=$PMSCO_PARTITION $SLURM_ARGS"
SLURM_ARGS="--time=$PMSCO_WALLTIME_HR:00:00 $SLURM_ARGS"
CMD="sbatch $SLURM_ARGS $PMSCO_JOBNAME.job"
echo $CMD
if [ "$NOSUB" != "true" ]; then
$CMD
fi
exit 0

View File

@@ -1,128 +0,0 @@
#!/bin/sh
#
# submission script for PMSCO calculations on Merlin cluster
#
if [ $# -lt 1 ]; then
echo "Usage: $0 [NOSUB] JOBNAME NODES WALLTIME:HOURS PROJECT MODE [LOG_LEVEL]"
echo ""
echo " NOSUB (optional): do not submit the script to the queue. default: submit."
echo " WALLTIME:HOURS (integer): sets the wall time limits."
echo " soft limit = HOURS:00:00"
echo " hard limit = HOURS:00:30"
echo " for short.q: HOURS = 0 (-> MINUTES=30)"
echo " for all.q: HOURS <= 24"
echo " for long.q: HOURS <= 96"
echo " PROJECT: python module (file path) that declares the project and starts the calculation."
echo " MODE: PMSCO calculation mode (single|swarm|gradient|grid)."
echo " LOG_LEVEL (optional): one of DEBUG, INFO, WARNING, ERROR if log files should be produced."
echo ""
echo "the job script complete with the program code and input/output data is generated in ~/jobs/\$JOBNAME"
exit 1
fi
# location of the pmsco package is derived from the path of this script
SCRIPTDIR="$(dirname $(readlink -f $0))"
SOURCEDIR="$SCRIPTDIR/.."
PHD_SOURCE_DIR="$SOURCEDIR"
PHD_CODE="edac"
# read arguments
if [ "$1" == "NOSUB" ]; then
NOSUB="true"
shift
else
NOSUB="false"
fi
PHD_JOBNAME=$1
shift
PHD_NODES=$1
shift
PHD_WALLTIME_HR=$1
PHD_WALLTIME_MIN=0
shift
PHD_PROJECT_FILE="$(readlink -f $1)"
PHD_PROJECT_ARGS=""
shift
PHD_MODE="$1"
shift
PHD_LOGLEVEL=""
if [ "$1" == "DEBUG" ] || [ "$1" == "INFO" ] || [ "$1" == "WARNING" ] || [ "$1" == "ERROR" ]; then
PHD_LOGLEVEL="$1"
shift
fi
# ignore remaining arguments
PHD_SCAN_FILES=""
# select allowed queues
QUEUE=short.q,all.q,long.q
# for short queue (limit 30 minutes)
if [ "$PHD_WALLTIME_HR" -lt 1 ]; then
PHD_WALLTIME_HR=0
PHD_WALLTIME_MIN=30
fi
# set up working directory
cd ~
if [ ! -d "jobs" ]; then
mkdir jobs
fi
cd jobs
if [ ! -d "$PHD_JOBNAME" ]; then
mkdir "$PHD_JOBNAME"
fi
cd "$PHD_JOBNAME"
WORKDIR="$(pwd)"
PHD_WORK_DIR="$WORKDIR"
# provide revision information, requires git repository
cd "$SOURCEDIR"
PHD_REV=$(git log --pretty=format:"%h, %ad" --date=iso -1)
if [ $? -ne 0 ]; then
PHD_REV="revision unknown, "$(date +"%F %T %z")
fi
cd "$WORKDIR"
echo "$PHD_REV" > revision.txt
# generate job script from template
sed -e "s:_PHD_WORK_DIR:$PHD_WORK_DIR:g" \
-e "s:_PHD_JOBNAME:$PHD_JOBNAME:g" \
-e "s:_PHD_NODES:$PHD_NODES:g" \
-e "s:_PHD_WALLTIME_HR:$PHD_WALLTIME_HR:g" \
-e "s:_PHD_WALLTIME_MIN:$PHD_WALLTIME_MIN:g" \
-e "s:_PHD_PROJECT_FILE:$PHD_PROJECT_FILE:g" \
-e "s:_PHD_PROJECT_ARGS:$PHD_PROJECT_ARGS:g" \
-e "s:_PHD_CODE:$PHD_CODE:g" \
-e "s:_PHD_MODE:$PHD_MODE:g" \
-e "s:_PHD_SOURCE_DIR:$PHD_SOURCE_DIR:g" \
-e "s:_PHD_SCAN_FILES:$PHD_SCAN_FILES:g" \
-e "s:_PHD_LOGLEVEL:$PHD_LOGLEVEL:g" \
"$SCRIPTDIR/pmsco.sge.template" > $PHD_JOBNAME.job
chmod u+x "$PHD_JOBNAME.job"
if [ "$NOSUB" != "true" ]; then
# suppress bash error [stackoverflow.com/questions/10496758]
unset module
# submit the job script
# EMAIL must be defined in the environment
if [ -n "$EMAIL" ]; then
qsub -q $QUEUE -m ae -M $EMAIL $PHD_JOBNAME.job
else
qsub -q $QUEUE $PHD_JOBNAME.job
fi
fi
exit 0

View File

@@ -1,4 +1,4 @@
# Doxyfile 1.8.9.1
# Doxyfile 1.9.1
# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
@@ -17,11 +17,11 @@
# Project related configuration options
#---------------------------------------------------------------------------
# This tag specifies the encoding used for all characters in the config file
# that follow. The default is UTF-8 which is also the encoding used for all text
# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
# for the list of possible encodings.
# This tag specifies the encoding used for all characters in the configuration
# file that follow. The default is UTF-8 which is also the encoding used for all
# text before the first occurrence of this tag. Doxygen uses libiconv (or the
# iconv built into libc) for the transcoding. See
# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
# The default value is: UTF-8.
DOXYFILE_ENCODING = UTF-8
@@ -32,7 +32,7 @@ DOXYFILE_ENCODING = UTF-8
# title of most generated pages and in a few other places.
# The default value is: My Project.
PROJECT_NAME = "PEARL MSCO"
PROJECT_NAME = PMSCO
# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
@@ -44,7 +44,7 @@ PROJECT_NUMBER = $(REVISION)
# for a project that appears at the top of each page and should give viewer a
# quick idea about the purpose of the project. Keep the description short.
PROJECT_BRIEF = "PEARL multiple scattering calculation and optimization"
PROJECT_BRIEF = "PSI multiple scattering calculation and optimization"
# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
# in the documentation. The maximum height of the logo should not exceed 55
@@ -58,7 +58,7 @@ PROJECT_LOGO =
# entered, it will be relative to the location where doxygen was started. If
# left blank the current directory will be used.
OUTPUT_DIRECTORY =
OUTPUT_DIRECTORY = $(OUTDIR)
# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
# directories (in 2 levels) under the output directory of each output format and
@@ -93,6 +93,14 @@ ALLOW_UNICODE_NAMES = NO
OUTPUT_LANGUAGE = English
# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
# documentation generated by doxygen is written. Doxygen will use this
# information to generate all generated output in the proper direction.
# Possible values are: None, LTR, RTL and Context.
# The default value is: None.
OUTPUT_TEXT_DIRECTION = None
# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
# descriptions after the members that are listed in the file and class
# documentation (similar to Javadoc). Set to NO to disable this.
@@ -179,6 +187,16 @@ SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = YES
# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
# such as
# /***************
# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
# Javadoc-style will behave just like regular comments and it will not be
# interpreted by doxygen.
# The default value is: NO.
JAVADOC_BANNER = NO
# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
# line (until the first dot) of a Qt-style comment as the brief description. If
# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
@@ -199,6 +217,14 @@ QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
# By default Python docstrings are displayed as preformatted text and doxygen's
# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
# doxygen's special commands can be used and the contents of the docstring
# documentation blocks is shown as doxygen documentation.
# The default value is: YES.
PYTHON_DOCSTRING = NO
# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
# documentation from any documented member that it re-implements.
# The default value is: YES.
@@ -226,15 +252,14 @@ TAB_SIZE = 4
# will allow you to put the command \sideeffect (or @sideeffect) in the
# documentation, which will result in a user-defined paragraph with heading
# "Side Effects:". You can put \n's in the value part of an alias to insert
# newlines.
# newlines (in the resulting output). You can put ^^ in the value part of an
# alias to insert a newline as if a physical newline was in the original file.
# When you need a literal { or } or , in the value part of an alias you have to
# escape them by means of a backslash (\), this can lead to conflicts with the
# commands \{ and \} for these it is advised to use the version @{ and @} or use
# a double escape (\\{ and \\})
ALIASES = "raise=@exception"
# This tag can be used to specify a number of word-keyword mappings (TCL only).
# A mapping has the form "name=value". For example adding "class=itcl::class"
# will allow you to use the command class in the itcl::class meaning.
TCL_SUBST =
ALIASES = raise=@exception
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
# only. Doxygen will then generate output that is more tailored for C. For
@@ -264,28 +289,40 @@ OPTIMIZE_FOR_FORTRAN = NO
OPTIMIZE_OUTPUT_VHDL = NO
# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
# sources only. Doxygen will then generate output that is more tailored for that
# language. For instance, namespaces will be presented as modules, types will be
# separated into more groups, etc.
# The default value is: NO.
OPTIMIZE_OUTPUT_SLICE = NO
# Doxygen selects the parser to use depending on the extension of the files it
# parses. With this tag you can assign which parser to use for a given
# extension. Doxygen has a built-in mapping, but you can override or extend it
# using this tag. The format is ext=language, where ext is a file extension, and
# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
# Fortran. In the later case the parser tries to guess whether the code is fixed
# or free formatted code, this is the default for Fortran type files), VHDL. For
# instance to make doxygen treat .inc files as Fortran files (default is PHP),
# and .f files as C (default is Fortran), use: inc=Fortran f=C.
# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
# tries to guess whether the code is fixed or free formatted code, this is the
# default for Fortran type files). For instance to make doxygen treat .inc files
# as Fortran files (default is PHP), and .f files as C (default is Fortran),
# use: inc=Fortran f=C.
#
# Note: For files without extension you can use no_extension as a placeholder.
#
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
# the files are not read by doxygen.
# the files are not read by doxygen. When specifying no_extension you should add
# * to the FILE_PATTERNS.
#
# Note see also the list of default file extension mappings.
EXTENSION_MAPPING =
# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
# according to the Markdown format, which allows for more readable
# documentation. See http://daringfireball.net/projects/markdown/ for details.
# documentation. See https://daringfireball.net/projects/markdown/ for details.
# The output of markdown processing is further processed by doxygen, so you can
# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
# case of backward compatibilities issues.
@@ -293,6 +330,15 @@ EXTENSION_MAPPING =
MARKDOWN_SUPPORT = YES
# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
# to that level are automatically included in the table of contents, even if
# they do not have an id attribute.
# Note: This feature currently applies only to Markdown headings.
# Minimum value: 0, maximum value: 99, default value: 5.
# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
TOC_INCLUDE_HEADINGS = 5
# When enabled doxygen tries to link words that correspond to documented
# classes, or namespaces to their corresponding documentation. Such a link can
# be prevented in individual cases by putting a % sign in front of the word or
@@ -318,7 +364,7 @@ BUILTIN_STL_SUPPORT = NO
CPP_CLI_SUPPORT = NO
# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
# will parse them like normal C++ but will assume all classes use public instead
# of private inheritance when no explicit protection keyword is present.
# The default value is: NO.
@@ -343,6 +389,13 @@ IDL_PROPERTY_SUPPORT = YES
DISTRIBUTE_GROUP_DOC = NO
# If one adds a struct or class to a group and this option is enabled, then also
# any nested class or struct is added to the same group. By default this option
# is disabled and one has to add nested compounds explicitly via \ingroup.
# The default value is: NO.
GROUP_NESTED_COMPOUNDS = NO
# Set the SUBGROUPING tag to YES to allow class member groups of the same type
# (for instance a group of public functions) to be put as a subgroup of that
# type (e.g. under the Public Functions section). Set it to NO to prevent
@@ -397,6 +450,19 @@ TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0
# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
# during processing. When set to 0 doxygen will based this on the number of
# cores available in the system. You can set it explicitly to a value larger
# than 0 to get more control over the balance between CPU load and processing
# speed. At this moment only the input processing can be done using multiple
# threads. Since this is still an experimental feature the default is set to 1,
# which efficively disables parallel processing. Please report any issues you
# encounter. Generating dot graphs in parallel is controlled by the
# DOT_NUM_THREADS setting.
# Minimum value: 0, maximum value: 32, default value: 1.
NUM_PROC_THREADS = 1
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
@@ -417,6 +483,12 @@ EXTRACT_ALL = YES
EXTRACT_PRIVATE = YES
# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
# methods of a class will be included in the documentation.
# The default value is: NO.
EXTRACT_PRIV_VIRTUAL = NO
# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
# scope will be included in the documentation.
# The default value is: NO.
@@ -454,6 +526,13 @@ EXTRACT_LOCAL_METHODS = YES
EXTRACT_ANON_NSPACES = NO
# If this flag is set to YES, the name of an unnamed parameter in a declaration
# will be determined by the corresponding definition. By default unnamed
# parameters remain unnamed in the output.
# The default value is: YES.
RESOLVE_UNNAMED_PARAMS = YES
# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
# undocumented members inside documented classes or files. If set to NO these
# members will be included in the various overviews, but no documentation
@@ -471,8 +550,8 @@ HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
# (class|struct|union) declarations. If set to NO, these declarations will be
# included in the documentation.
# declarations. If set to NO, these declarations will be included in the
# documentation.
# The default value is: NO.
HIDE_FRIEND_COMPOUNDS = NO
@@ -491,11 +570,18 @@ HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = NO
# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
# names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# and Mac users are advised to set this option to NO.
# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
# able to match the capabilities of the underlying filesystem. In case the
# filesystem is case sensitive (i.e. it supports files in the same directory
# whose names only differ in casing), the option must be set to YES to properly
# deal with such files in case they appear in the input. For filesystems that
# are not case sensitive the option should be be set to NO to properly deal with
# output files written for symbols that only differ in casing, such as for two
# classes, one named CLASS and the other named Class, and to also support
# references to files without having to specify the exact matching casing. On
# Windows (including Cygwin) and MacOS, users should typically set this option
# to NO, whereas on Linux or other Unix flavors it should typically be set to
# YES.
# The default value is: system dependent.
CASE_SENSE_NAMES = YES
@@ -682,7 +768,7 @@ LAYOUT_FILE =
# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
# the reference definitions. This must be a list of .bib files. The .bib
# extension is automatically appended if omitted. This requires the bibtex tool
# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
# For LaTeX the style of the bibliography can be controlled using
# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
# search path. See also \cite for info how to create references.
@@ -727,11 +813,21 @@ WARN_IF_DOC_ERROR = YES
# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
# are documented, but have no documentation for their parameters or return
# value. If set to NO, doxygen will only warn about wrong or incomplete
# parameter documentation, but not about the absence of documentation.
# parameter documentation, but not about the absence of documentation. If
# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
# The default value is: NO.
WARN_NO_PARAMDOC = NO
# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
# at the end of the doxygen process doxygen will return with a non-zero status.
# Possible values are: NO, YES and FAIL_ON_WARNINGS.
# The default value is: NO.
WARN_AS_ERROR = NO
# The WARN_FORMAT tag determines the format of the warning messages that doxygen
# can produce. The string should contain the $file, $line, and $text tags, which
# will be replaced by the file and line number from which the warning originated
@@ -755,40 +851,49 @@ WARN_LOGFILE =
# The INPUT tag is used to specify the files and/or directories that contain
# documented source files. You may enter file names like myfile.cpp or
# directories like /usr/src/myproject. Separate the files or directories with
# spaces.
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.
INPUT = \
src/introduction.dox \
INPUT = src/introduction.dox \
src/concepts.dox \
src/concepts-tasks.dox \
src/concepts-emitter.dox \
src/concepts-atomscat.dox \
src/installation.dox \
src/project.dox \
src/execution.dox \
src/commandline.dox \
src/runfile.dox \
src/optimizers.dox \
../pmsco \
../projects \
../tests
src/reports.dox \
../pmsco
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
# documentation (see: http://www.gnu.org/software/libiconv) for the list of
# possible encodings.
# documentation (see:
# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
# The default value is: UTF-8.
INPUT_ENCODING = UTF-8
# If the value of the INPUT tag contains directories, you can use the
# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
# *.h) to filter out the source-files in the directories. If left blank the
# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
# *.qsf, *.as and *.js.
# *.h) to filter out the source-files in the directories.
#
# Note that for custom extensions or not directly supported extensions you also
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# read by doxygen.
#
# Note the list of default checked file patterns might differ from the list of
# default file extension mappings.
#
# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl,
# *.ucf, *.qsf and *.ice.
FILE_PATTERNS = *.py \
*.dox
@@ -806,10 +911,7 @@ RECURSIVE = YES
# Note that relative paths are relative to the directory from which doxygen is
# run.
EXCLUDE = ../pmsco/edac \
../pmsco/loess \
../pmsco/msc \
../pmsco/mufpot
EXCLUDE = ../pmsco/projects
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
@@ -879,6 +981,10 @@ IMAGE_PATH = src/images
# Note that the filter must not add or remove lines; it is applied before the
# code is scanned, but not when the output code is generated. If lines are added
# or removed, the anchors will not be placed correctly.
#
# Note that for custom extensions or not directly supported extensions you also
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# properly processed by doxygen.
INPUT_FILTER =
@@ -888,8 +994,12 @@ INPUT_FILTER =
# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
# patterns match the file name, INPUT_FILTER is applied.
#
# Note that for custom extensions or not directly supported extensions you also
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# properly processed by doxygen.
FILTER_PATTERNS = *.py=/usr/bin/doxypy
FILTER_PATTERNS = *.py=./py_filter.sh
# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
# INPUT_FILTER) will also be used to filter the input files that are used for
@@ -940,7 +1050,7 @@ INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
# function all documented functions referencing it will be listed.
# entity all documented functions referencing it will be listed.
# The default value is: NO.
REFERENCED_BY_RELATION = NO
@@ -972,12 +1082,12 @@ SOURCE_TOOLTIPS = YES
# If the USE_HTAGS tag is set to YES then the references to source code will
# point to the HTML generated by the htags(1) tool instead of doxygen built-in
# source browser. The htags tool is part of GNU's global source tagging system
# (see http://www.gnu.org/software/global/global.html). You will need version
# (see https://www.gnu.org/software/global/global.html). You will need version
# 4.8.6 or higher.
#
# To use it do the following:
# - Install the latest version of global
# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
# - Make sure the INPUT points to the root of the source tree
# - Run doxygen as normal
#
@@ -1000,16 +1110,22 @@ USE_HTAGS = NO
VERBATIM_HEADERS = YES
# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
# cost of reduced performance. This can be particularly helpful with template
# rich C++ code for which doxygen's built-in parser lacks the necessary type
# information.
# clang parser (see:
# http://clang.llvm.org/) for more accurate parsing at the cost of reduced
# performance. This can be particularly helpful with template rich C++ code for
# which doxygen's built-in parser lacks the necessary type information.
# Note: The availability of this option depends on whether or not doxygen was
# compiled with the --with-libclang option.
# generated with the -Duse_libclang=ON option for CMake.
# The default value is: NO.
CLANG_ASSISTED_PARSING = NO
# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to
# YES then doxygen will add the directory of each input to the include path.
# The default value is: YES.
CLANG_ADD_INC_PATHS = YES
# If clang assisted parsing is enabled you can provide the compiler with command
# line options that you would normally use when invoking the compiler. Note that
# the include paths will already be set by doxygen for the files and directories
@@ -1018,6 +1134,19 @@ CLANG_ASSISTED_PARSING = NO
CLANG_OPTIONS =
# If clang assisted parsing is enabled you can provide the clang parser with the
# path to the directory containing a file called compile_commands.json. This
# file is the compilation database (see:
# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
# options used when the source files were built. This is equivalent to
# specifying the -p option to a clang tool, such as clang-check. These options
# will then be passed to the parser. Any options specified with CLANG_OPTIONS
# will be added as well.
# Note: The availability of this option depends on whether or not doxygen was
# generated with the -Duse_libclang=ON option for CMake.
CLANG_DATABASE_PATH =
#---------------------------------------------------------------------------
# Configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
@@ -1029,13 +1158,6 @@ CLANG_OPTIONS =
ALPHABETICAL_INDEX = YES
# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
# which the alphabetical index list will be split.
# Minimum value: 1, maximum value: 20, default value: 5.
# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
COLS_IN_ALPHA_INDEX = 8
# In case all classes in a project start with a common prefix, all classes will
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
# can be used to specify a prefix (or a list of prefixes) that should be ignored
@@ -1136,7 +1258,7 @@ HTML_EXTRA_FILES =
# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
# will adjust the colors in the style sheet and background images according to
# this color. Hue is specified as an angle on a colorwheel, see
# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
# purple, and 360 is red again.
# Minimum value: 0, maximum value: 359, default value: 220.
@@ -1165,12 +1287,24 @@ HTML_COLORSTYLE_GAMMA = 80
# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
# page will contain the date and time when the page was generated. Setting this
# to NO can help when comparing the output of multiple runs.
# The default value is: YES.
# to YES can help to show when doxygen was last run and thus if the
# documentation is up to date.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.
HTML_TIMESTAMP = YES
# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
# documentation will contain a main index with vertical navigation menus that
# are dynamically created via JavaScript. If disabled, the navigation index will
# consists of multiple levels of tabs that are statically embedded in every HTML
# page. Disable this option to support browsers that do not have JavaScript,
# like the Qt help browser.
# The default value is: YES.
# This tag requires that the tag GENERATE_HTML is set to YES.
HTML_DYNAMIC_MENUS = YES
# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
# documentation will contain sections that can be hidden and shown after the
# page has loaded.
@@ -1194,13 +1328,14 @@ HTML_INDEX_NUM_ENTRIES = 100
# If the GENERATE_DOCSET tag is set to YES, additional index files will be
# generated that can be used as input for Apple's Xcode 3 integrated development
# environment (see: http://developer.apple.com/tools/xcode/), introduced with
# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
# Makefile in the HTML output directory. Running make will produce the docset in
# that directory and running make install will install the docset in
# environment (see:
# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
# create a documentation set, doxygen will generate a Makefile in the HTML
# output directory. Running make will produce the docset in that directory and
# running make install will install the docset in
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
# for more information.
# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
# genXcode/_index.html for more information.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.
@@ -1239,8 +1374,8 @@ DOCSET_PUBLISHER_NAME = Publisher
# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
# Windows.
# (see:
# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows.
#
# The HTML Help Workshop contains a compiler that can convert all HTML output
# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1270,7 +1405,7 @@ CHM_FILE =
HHC_LOCATION =
# The GENERATE_CHI flag controls if a separate .chi index file is generated
# (YES) or that it should be included in the master .chm file (NO).
# (YES) or that it should be included in the main .chm file (NO).
# The default value is: NO.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
@@ -1315,7 +1450,8 @@ QCH_FILE =
# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
# Project output. For more information please see Qt Help Project / Namespace
# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
# (see:
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
# The default value is: org.doxygen.Project.
# This tag requires that the tag GENERATE_QHP is set to YES.
@@ -1323,8 +1459,8 @@ QHP_NAMESPACE = org.doxygen.Project
# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
# Help Project output. For more information please see Qt Help Project / Virtual
# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
# folders).
# Folders (see:
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
# The default value is: doc.
# This tag requires that the tag GENERATE_QHP is set to YES.
@@ -1332,30 +1468,30 @@ QHP_VIRTUAL_FOLDER = doc
# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
# filter to add. For more information please see Qt Help Project / Custom
# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
# filters).
# Filters (see:
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
# This tag requires that the tag GENERATE_QHP is set to YES.
QHP_CUST_FILTER_NAME =
# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
# custom filter to add. For more information please see Qt Help Project / Custom
# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
# filters).
# Filters (see:
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
# This tag requires that the tag GENERATE_QHP is set to YES.
QHP_CUST_FILTER_ATTRS =
# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
# project's filter section matches. Qt Help Project / Filter Attributes (see:
# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
# This tag requires that the tag GENERATE_QHP is set to YES.
QHP_SECT_FILTER_ATTRS =
# The QHG_LOCATION tag can be used to specify the location of Qt's
# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
# generated .qhp file.
# The QHG_LOCATION tag can be used to specify the location (absolute path
# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
# run qhelpgenerator on the generated .qhp file.
# This tag requires that the tag GENERATE_QHP is set to YES.
QHG_LOCATION =
@@ -1432,6 +1568,17 @@ TREEVIEW_WIDTH = 250
EXT_LINKS_IN_WINDOW = NO
# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
# the HTML output. These images will generally look nicer at scaled resolutions.
# Possible values are: png (the default) and svg (looks nicer but requires the
# pdf2svg or inkscape tool).
# The default value is: png.
# This tag requires that the tag GENERATE_HTML is set to YES.
HTML_FORMULA_FORMAT = png
# Use this tag to change the font size of LaTeX formulas included as images in
# the HTML documentation. When you change the font size after a successful
# doxygen run you need to manually remove any form_*.png images from the HTML
@@ -1441,7 +1588,7 @@ EXT_LINKS_IN_WINDOW = NO
FORMULA_FONTSIZE = 10
# Use the FORMULA_TRANPARENT tag to determine whether or not the images
# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
# generated for formulas are transparent PNGs. Transparent PNGs are not
# supported properly for IE 6.0, but are supported on all modern browsers.
#
@@ -1452,8 +1599,14 @@ FORMULA_FONTSIZE = 10
FORMULA_TRANSPARENT = YES
# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
# to create new LaTeX commands to be used in formulas as building blocks. See
# the section "Including formulas" for details.
FORMULA_MACROFILE =
# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
# http://www.mathjax.org) which uses client side Javascript for the rendering
# https://www.mathjax.org) which uses client side JavaScript for the rendering
# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
# installed or if you want to formulas look prettier in the HTML output. When
# enabled you may also need to install MathJax separately and configure the path
@@ -1465,7 +1618,7 @@ USE_MATHJAX = NO
# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
# http://docs.mathjax.org/en/latest/output.html) for more details.
# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details.
# Possible values are: HTML-CSS (which is slower, but has the best
# compatibility), NativeMML (i.e. MathML) and SVG.
# The default value is: HTML-CSS.
@@ -1480,8 +1633,8 @@ MATHJAX_FORMAT = HTML-CSS
# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
# Content Delivery Network so you can quickly see the result without installing
# MathJax. However, it is strongly recommended to install a local copy of
# MathJax from http://www.mathjax.org before deployment.
# The default value is: http://cdn.mathjax.org/mathjax/latest.
# MathJax from https://www.mathjax.org before deployment.
# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
# This tag requires that the tag USE_MATHJAX is set to YES.
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
@@ -1495,7 +1648,8 @@ MATHJAX_EXTENSIONS =
# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
# of code that will be used on startup of the MathJax code. See the MathJax site
# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
# (see:
# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
# example see the documentation.
# This tag requires that the tag USE_MATHJAX is set to YES.
@@ -1523,7 +1677,7 @@ MATHJAX_CODEFILE =
SEARCHENGINE = YES
# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
# implemented using a web server instead of a web client using Javascript. There
# implemented using a web server instead of a web client using JavaScript. There
# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
# setting. When disabled, doxygen will generate a PHP script for searching and
# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
@@ -1542,7 +1696,8 @@ SERVER_BASED_SEARCH = NO
#
# Doxygen ships with an example indexer (doxyindexer) and search engine
# (doxysearch.cgi) which are based on the open source search engine library
# Xapian (see: http://xapian.org/).
# Xapian (see:
# https://xapian.org/).
#
# See the section "External Indexing and Searching" for details.
# The default value is: NO.
@@ -1555,8 +1710,9 @@ EXTERNAL_SEARCH = NO
#
# Doxygen ships with an example indexer (doxyindexer) and search engine
# (doxysearch.cgi) which are based on the open source search engine library
# Xapian (see: http://xapian.org/). See the section "External Indexing and
# Searching" for details.
# Xapian (see:
# https://xapian.org/). See the section "External Indexing and Searching" for
# details.
# This tag requires that the tag SEARCHENGINE is set to YES.
SEARCHENGINE_URL =
@@ -1594,7 +1750,7 @@ EXTRA_SEARCH_MAPPINGS =
# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
# The default value is: YES.
GENERATE_LATEX = YES
GENERATE_LATEX = NO
# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -1607,21 +1763,35 @@ LATEX_OUTPUT = latex
# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
# invoked.
#
# Note that when enabling USE_PDFLATEX this option is only used for generating
# bitmaps for formulas in the HTML output, but not in the Makefile that is
# written to the output directory.
# The default file is: latex.
# Note that when not enabling USE_PDFLATEX the default is latex when enabling
# USE_PDFLATEX the default is pdflatex and when in the later case latex is
# chosen this is overwritten by pdflatex. For specific output languages the
# default can have been set differently, this depends on the implementation of
# the output language.
# This tag requires that the tag GENERATE_LATEX is set to YES.
LATEX_CMD_NAME = latex
# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
# index for LaTeX.
# Note: This tag is used in the Makefile / make.bat.
# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
# (.tex).
# The default file is: makeindex.
# This tag requires that the tag GENERATE_LATEX is set to YES.
MAKEINDEX_CMD_NAME = makeindex
# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
# generate index for LaTeX. In case there is no backslash (\) as first character
# it will be automatically added in the LaTeX code.
# Note: This tag is used in the generated output file (.tex).
# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
# The default value is: makeindex.
# This tag requires that the tag GENERATE_LATEX is set to YES.
LATEX_MAKEINDEX_CMD = makeindex
# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
# documents. This may be useful for small projects and may help to save some
# trees in general.
@@ -1640,9 +1810,12 @@ COMPACT_LATEX = NO
PAPER_TYPE = a4
# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
# that should be included in the LaTeX output. To get the times font for
# instance you can specify
# EXTRA_PACKAGES=times
# that should be included in the LaTeX output. The package can be specified just
# by its name or with the correct syntax as to be used with the LaTeX
# \usepackage command. To get the times font for instance you can specify :
# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
# To use the option intlimits with the amsmath package you can specify:
# EXTRA_PACKAGES=[intlimits]{amsmath}
# If left blank no extra packages will be included.
# This tag requires that the tag GENERATE_LATEX is set to YES.
@@ -1703,9 +1876,11 @@ LATEX_EXTRA_FILES =
PDF_HYPERLINKS = YES
# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
# the PDF file directly from the LaTeX files. Set this option to YES, to get a
# higher quality PDF documentation.
# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
# files. Set this option to YES, to get a higher quality PDF documentation.
#
# See also section LATEX_CMD_NAME for selecting the engine.
# The default value is: YES.
# This tag requires that the tag GENERATE_LATEX is set to YES.
@@ -1739,12 +1914,28 @@ LATEX_SOURCE_CODE = NO
# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
# bibliography, e.g. plainnat, or ieeetr. See
# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
# The default value is: plain.
# This tag requires that the tag GENERATE_LATEX is set to YES.
LATEX_BIB_STYLE = plain
# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
# page will contain the date and time when the page was generated. Setting this
# to NO can help when comparing the output of multiple runs.
# The default value is: NO.
# This tag requires that the tag GENERATE_LATEX is set to YES.
LATEX_TIMESTAMP = NO
# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
# path from which the emoji images will be read. If a relative path is entered,
# it will be relative to the LATEX_OUTPUT directory. If left blank the
# LATEX_OUTPUT directory will be used.
# This tag requires that the tag GENERATE_LATEX is set to YES.
LATEX_EMOJI_DIRECTORY =
#---------------------------------------------------------------------------
# Configuration options related to the RTF output
#---------------------------------------------------------------------------
@@ -1784,9 +1975,9 @@ COMPACT_RTF = NO
RTF_HYPERLINKS = NO
# Load stylesheet definitions from file. Syntax is similar to doxygen's config
# file, i.e. a series of assignments. You only have to provide replacements,
# missing definitions are set to their default value.
# Load stylesheet definitions from file. Syntax is similar to doxygen's
# configuration file, i.e. a series of assignments. You only have to provide
# replacements, missing definitions are set to their default value.
#
# See also section "Doxygen usage" for information on how to generate the
# default style sheet that doxygen normally uses.
@@ -1795,8 +1986,8 @@ RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
# Set optional variables used in the generation of an RTF document. Syntax is
# similar to doxygen's config file. A template extensions file can be generated
# using doxygen -e rtf extensionFile.
# similar to doxygen's configuration file. A template extensions file can be
# generated using doxygen -e rtf extensionFile.
# This tag requires that the tag GENERATE_RTF is set to YES.
RTF_EXTENSIONS_FILE =
@@ -1882,6 +2073,13 @@ XML_OUTPUT = xml
XML_PROGRAMLISTING = YES
# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
# namespace members in file scope as well, matching the HTML output.
# The default value is: NO.
# This tag requires that the tag GENERATE_XML is set to YES.
XML_NS_MEMB_FILE_SCOPE = NO
#---------------------------------------------------------------------------
# Configuration options related to the DOCBOOK output
#---------------------------------------------------------------------------
@@ -1914,9 +2112,9 @@ DOCBOOK_PROGRAMLISTING = NO
#---------------------------------------------------------------------------
# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
# AutoGen Definitions (see http://autogen.sf.net) file that captures the
# structure of the code including all documentation. Note that this feature is
# still experimental and incomplete at the moment.
# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
# the structure of the code including all documentation. Note that this feature
# is still experimental and incomplete at the moment.
# The default value is: NO.
GENERATE_AUTOGEN_DEF = NO
@@ -2083,12 +2281,6 @@ EXTERNAL_GROUPS = YES
EXTERNAL_PAGES = YES
# The PERL_PATH should be the absolute path and name of the perl script
# interpreter (i.e. the result of 'which perl').
# The default file (with absolute path) is: /usr/bin/perl.
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
@@ -2102,15 +2294,6 @@ PERL_PATH = /usr/bin/perl
CLASS_DIAGRAMS = YES
# You can define message sequence charts within doxygen comments using the \msc
# command. Doxygen will then run the mscgen tool (see:
# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
# documentation. The MSCGEN_PATH tag allows you to specify the directory where
# the mscgen tool resides. If left empty the tool is assumed to be found in the
# default search path.
MSCGEN_PATH =
# You can include diagrams made with dia in doxygen documentation. Doxygen will
# then run dia to produce the diagram and insert it in the documentation. The
# DIA_PATH tag allows you to specify the directory where the dia binary resides.
@@ -2208,10 +2391,32 @@ UML_LOOK = YES
# but if the number exceeds 15, the total amount of fields shown is limited to
# 10.
# Minimum value: 0, maximum value: 100, default value: 10.
# This tag requires that the tag HAVE_DOT is set to YES.
# This tag requires that the tag UML_LOOK is set to YES.
UML_LIMIT_NUM_FIELDS = 10
# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
# tag is set to YES, doxygen will add type and arguments for attributes and
# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
# will not generate fields with class member information in the UML graphs. The
# class diagrams will look similar to the default class diagrams but using UML
# notation for the relationships.
# Possible values are: NO, YES and NONE.
# The default value is: NO.
# This tag requires that the tag UML_LOOK is set to YES.
DOT_UML_DETAILS = NO
# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
# to display on a single line. If the actual line length exceeds this threshold
# significantly it will wrapped across multiple lines. Some heuristics are apply
# to avoid ugly line breaks.
# Minimum value: 0, maximum value: 1000, default value: 17.
# This tag requires that the tag HAVE_DOT is set to YES.
DOT_WRAP_THRESHOLD = 17
# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
# collaboration graphs will show the relations between templates and their
# instances.
@@ -2243,7 +2448,8 @@ INCLUDED_BY_GRAPH = YES
#
# Note that enabling this option will significantly increase the time of a run.
# So in most cases it will be better to enable call graphs for selected
# functions only using the \callgraph command.
# functions only using the \callgraph command. Disabling a call graph can be
# accomplished by means of the command \hidecallgraph.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.
@@ -2254,7 +2460,8 @@ CALL_GRAPH = NO
#
# Note that enabling this option will significantly increase the time of a run.
# So in most cases it will be better to enable caller graphs for selected
# functions only using the \callergraph command.
# functions only using the \callergraph command. Disabling a caller graph can be
# accomplished by means of the command \hidecallergraph.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.
@@ -2277,13 +2484,17 @@ GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
# generated by dot.
# generated by dot. For an explanation of the image formats see the section
# output formats in the documentation of the dot tool (Graphviz (see:
# http://www.graphviz.org/)).
# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
# to make the SVG files visible in IE 9+ (other browsers do not have this
# requirement).
# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd,
# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo,
# gif:cairo:gd, gif:gd, gif:gd:gd and svg.
# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo,
# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
# png:gdiplus:gdiplus.
# The default value is: png.
# This tag requires that the tag HAVE_DOT is set to YES.
@@ -2334,6 +2545,11 @@ DIAFILE_DIRS =
PLANTUML_JAR_PATH = $(PLANTUML_JAR_PATH)
# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
# configuration file for plantuml.
PLANTUML_CFG_FILE =
# When using plantuml, the specified paths are searched for files specified by
# the !include statement in a plantuml block.
@@ -2392,9 +2608,11 @@ DOT_MULTI_TARGETS = NO
GENERATE_LEGEND = YES
# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
# files that are used to generate the various graphs.
#
# Note: This setting is not only used for dot files but also for msc and
# plantuml temporary files.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.
DOT_CLEANUP = YES

View File

@@ -10,26 +10,25 @@ SHELL=/bin/sh
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .html
.PHONY: all docs clean
.PHONY: all docs html clean
DOX=doxygen
DOXOPTS=
LATEX_DIR=latex
REVISION=$(shell git describe --always --tags --dirty --long || echo "unknown, "`date +"%F %T %z"`)
export REVISION
OUTDIR=
export OUTDIR
all: docs
all: html
docs: doxygen pdf
docs: html
doxygen:
$(DOX) $(DOXOPTS) config.dox
pdf: doxygen
-$(MAKE) -C $(LATEX_DIR)
html: doxygen
clean:
-rm -r latex/*
-rm -r html/*

2
docs/py_filter.sh Executable file
View File

@@ -0,0 +1,2 @@
#!/bin/bash
python -m doxypypy.doxypypy -a -c $1

28
docs/readme.md Normal file
View File

@@ -0,0 +1,28 @@
To compile the source code documentation in HTML format on Ubuntu, follow the instructions below.
~~~~~~{.sh}
apt-get update
apt-get install -y --no-install-recommends \
default-jre \
doxygen \
gawk \
git \
graphviz \
pandoc \
wget
pip install --no-cache-dir \
doxypypy \
meson \
meson-python \
ninja \
pynose
wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
export PLANTUML_JAR_PATH=/app/plantuml.jar
cd pmsco/docs
doxygen config.dox
~~~~~~
Open `pmsco/docs/html/index.html` in your browser.

View File

@@ -1,7 +0,0 @@
to compile the source code documentation, you need the following packages (naming according to Debian):
doxygen
doxygen-gui (optional)
doxypy
graphviz
latex (optional)

View File

@@ -1,135 +1,48 @@
/*! @page pag_command Command Line
\section sec_command Command Line
This section describes the command line arguments for a direct call of PMSCO from the shell.
For batch job submission to Slurm see @ref sec_slurm.
Assuming that PMSCO has been installed in the active Python environment (@ref pag_install),
the basic command line of PMSCO is as follows:
Since PMSCO is started indirectly by a call of the specific project module,
the syntax of the command line arguments is defined by the project module.
However, to reduce the amount of custom code and documentation and to avoid confusion
it is recommended to adhere to the standard syntax described below.
~~~~~~{.sh}
[mpiexec -np NPROCESSES] python -m pmsco [options]
~~~~~~
The basic command line is as follows:
@code{.sh}
[mpiexec -np NPROCESSES] python path/to/pmsco path/to/project.py [common args] [project args]
@endcode
The first portion between square brackets is necessary for parallel execution using MPI.
Replace `NPROCESSES` by the number of processes.
Include the first portion between square brackets if you want to run parallel processes.
Specify the number of processes as the @c -np option.
@c path/to/pmsco is the directory where <code>__main.py__</code> is located.
Do not include the extension <code>.py</code> or a trailing slash.
@c path/to/project.py should be the path and name to your project module.
Common args and project args are described below.
The PMSCO main program has a limited number of `common arguments` that are described below.
Usually, all parameters should be declared in a @ref pag_runfile so that they can be archived with the results.
However, in some cases it may be necessary to override some common parameters, e.g. the job name, on the command line.
\subsection sec_common_args Common Arguments
\subsection sec_command_common Common Arguments
All common arguments are optional and default to more or less reasonable values if omitted.
They can be added to the command line in arbitrary order.
All common arguments can also be set in the project code or the run-file (recommended).
In that case, only the run-file is specified on the command line.
However, there are a number of options that override settings from the run-file.
The arguments can appear in arbitrary order.
The following table is ordered by importance.
| Option | Values | Description |
| --- | --- | --- |
| -h , --help | | Display a command line summary and exit. |
| -m , --mode | single (default), grid, swarm, genetic | Operation mode. |
| -d, --data-dir | file system path | Directory path for experimental data files (if required by project). Default: current working directory. |
| -o, --output-file | file system path | Base path and/or name for intermediate and output files. Default: pmsco_data |
| -t, --time-limit | decimal number | Wall time limit in hours. The optimizers try to finish before the limit. Default: 24.0. |
| -k, --keep-files | list of file categories | Output file categories to keep after the calculation. Multiple values can be specified and must be separated by spaces. By default, cluster and model (simulated data) of a limited number of best models are kept. See @ref sec_file_categories below. |
| --log-level | DEBUG, INFO, WARNING (default), ERROR, CRITICAL | Minimum level of messages that should be added to the log. |
| --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
| --log-disable | | Disable logging. By default, logging is on. |
| --pop-size | integer | Population size (number of particles) in swarm and genetic optimization mode. The default value is the greater of 4 or the number of parallel calculation processes. |
| --seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
| --table-file | file system path | Name of the model table file in table scan mode. |
| Option | Values | Description | Run File |
| --- | --- | --- | --- |
| -r, --run-file | file path | JSON-formatted configuration file that defines run-time parameters. The format and content of a run file is described in a section @ref pag_runfile. | no |
| -o, --output-dir | file path | Base path and/or name for intermediate and output files. | see note below |
| -j , --job-name | string | Job name | job-name |
| -m, --module | file path | Project module | __module__ |
| -c, --project-class | string | Project class | __class__ |
| -h, --help | | Display a command line summary and exit. | no |
The job name is used as a prefix of output file names.
It is also registered in the `jobs` table of the results database (if used),
and it is used to identify the job with a job scheduling system.
\subsubsection sec_file_categories File Categories
The following category names can be used with the `--keep-files` option.
Multiple names can be specified and must be separated by spaces.
| Category | Description | Default Action |
| --- | --- | --- |
| all | shortcut to include all categories | |
| input | raw input files for calculator, including cluster and phase files in custom format | delete |
| output | raw output files from calculator | delete |
| atomic | atomic scattering and emission files in portable format | delete |
| cluster | cluster files in portable XYZ format for report | keep |
| debug | debug files | delete |
| model | output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) | keep |
| scan | output files in ETPAI format: scan (a_b_-1_-1_-1) | keep |
| symmetry | output files in ETPAI format: symmetry (a_b_c_-1_-1) | delete |
| emitter | output files in ETPAI format: emitter (a_b_c_d_-1) | delete |
| region | output files in ETPAI format: region (a_b_c_d_e) | delete |
| report| final report of results | keep always |
| population | final state of particle population | keep |
| rfac | files related to models which give bad r-factors, see warning below | delete |
\note
The `report` category is always kept and cannot be turned off.
The `model` category is always kept in single calculation mode.
\warning
If you want to specify `rfac` with the `--keep-files` option,
you have to add the file categories that you want to keep, e.g.,
`--keep-files rfac cluster model scan population`
(to return the default categories for all calculated models).
Do not specify `rfac` alone as this will effectively not return any file.
\subsection sec_project_args Project Arguments
The following table lists a few recommended options that are handled by the project code.
Project options that are not listed here should use the long form to avoid conflicts in future versions.
| Option | Values | Description |
| --- | --- | --- |
| -s, --scans | project-dependent | Nick names of scans to use in calculation. The nick name selects the experimental data file and the initial state of the photoelectron. Multiple values can be specified and must be separated by spaces. |
\subsection sec_scanfile Experimental Scan Files
The recommended way of specifying experimental scan files is using nick names (dictionary keys) and the @c --scans option.
A dictionary in the module code defines the corresponding file name, chemical species of the emitter and initial state of the photoelectron.
The location of the files is selected using the common @c --data-dir option.
This way, the file names and photoelectron parameters are versioned with the code,
whereas command line arguments may easily get forgotten in the records.
\subsection sec_project_example Argument Handling
To handle command line arguments in a project module,
the module must define a <code>parse_project_args</code> and a <code>set_project_args</code> function.
An example can be found in the twoatom.py demo project.
\section sec_slurm Slurm Job Submission
The command line of the Slurm job submission script for the Ra cluster at PSI is as follows.
This script is specific to the configuration of the Ra cluster but may be adapted to other Slurm-based queues.
@code{.sh}
qpmsco.sh [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]
@endcode
Here, the first few arguments are positional and their order must be strictly adhered to.
After the positional arguments, optional arguments of the PMSCO project command line can be added in arbitrary order.
If you execute the script without arguments, it displays a short summary.
The job script is written to @c $DESTDIR/$JOBNAME which is also the destination of calculation output.
| Argument | Values | Description |
| --- | --- | --- |
| NOSUB (optional) | NOSUB or omitted | If NOSUB is present as the first argument, create the job script but do not submit it to the queue. Otherwise, submit the job script. |
| DESTDIR | file system path | destination directory. must exist. a sub-dir $JOBNAME is created. |
| JOBNAME | text | Name of job. Use only alphanumeric characters, no spaces. |
| NODES | integer | Number of computing nodes. (1 node = 24 or 32 processors). Do not specify more than 2. |
| TASKS_PER_NODE | 1...24, or 32 | Number of processes per node. 24 or 32 for full-node allocation. 1...23 for shared node allocation. |
| WALLTIME:HOURS | integer | Requested wall time. 1...24 for day partition, 24...192 for week partition, 1...192 for shared partition. This value is also passed on to PMSCO as the @c --time-limit argument. |
| PROJECT | file system path | Python module (file path) that declares the project and starts the calculation. |
| MODE | single, swarm, grid, genetic | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
| ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |
\note It is important that the job name be unique within a project.
Specifically, you need to *provide a new job name each time you start pmsco*, otherwise the job may fail.
It may be more natural to specify the job name on the command line using the `-j` argument
than to change the run file every time.
Unfortunately, PMSCO cannot auto-generate, auto-increment or verify the job name.
*/

View File

@@ -105,12 +105,12 @@ is assigned to the project's cluster_generator attribute.
1. Implement a count_emitters method in your project class
if the project uses more than one emitter configurations.
It must have same method contract as pmsco.cluster.ClusterGenerator.count_emitters.
Specifically, it must return the number of emitter configurations of a given model, scan and symmetry.
Specifically, it must return the number of emitter configurations of a given model, scan and domain.
If there is only one configuration, the method does not need to be implemented.
2. Implement a create_cluster method in your project class.
It must have same method contract as pmsco.cluster.ClusterGenerator.create_cluster.
Specifically, it must return a cluster.Cluster object for the given model, scan, symmetry and emitter configuration.
Specifically, it must return a cluster.Cluster object for the given model, scan, domain and emitter configuration.
The emitter atoms must be marked according to the emitter configuration specified by the index argument.
Note that, depending on the index.emit argument, all emitter atoms must be marked
or only the ones of the corresponding emitter configuration.

View File

@@ -1,32 +1,32 @@
/*! @page pag_concepts_symmetry Symmetry
/*! @page pag_concepts_domain Domain
\section sec_symmetry Symmetry and Domain Averaging
\section sec_domain Domain Averaging
A _symmetry_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
A _domain_ under PMSCO is a discrete variant of a set of calculation parameters (including the atomic cluster)
that is derived from the same set of model parameters
and that contributes incoherently to the measured diffraction pattern.
A symmetry may be represented by a special symmetry parameter which is not subject to optimization.
A domain may be represented by special domain parameters that are not subject to optimization.
For instance, a real sample may have additional rotational domains that are not present in the cluster,
increasing the symmetry from three-fold to six-fold.
For instance, a real sample may have rotational domains that are not present in the cluster,
changing the symmetry from three-fold to six-fold.
Or, an adsorbate may be present in a number of different lateral configurations on the substrate.
In the first case, it may be sufficient to fold calculated data in the proper way to generate the same symmetry as in the measurement.
In the latter case, it may be necessary to execute a scattering calculation for each possible orientation or a representative number of possible orientations.
PMSCO provides the basic framework to spawn multiple calculations according to the number of symmetries (cf. \ref sec_tasks).
The actual data reduction from multiple symmetries to one measurement needs to be implemented on the project level.
PMSCO provides the basic framework to spawn multiple calculations according to the number of domains (cf. \ref sec_tasks).
The actual data reduction from multiple domain to one measurement needs to be implemented on the project level.
This section explains the necessary steps.
1. Your project needs to populate the pmsco.project.Project.symmetries list.
For each symmetry, add a dictionary of symmetry parameters, e.g. <code>{'angle_azi': 15.0}</code>.
There must be at least one symmetry in a project, otherwise no calculation is executed.
1. Your project needs to populate the pmsco.project.Project.domains list.
For each domain, add a dictionary of domain parameters, e.g. <code>{'angle_azi': 15.0}</code>.
At least one domain must be declared in a project, otherwise no calculation is executed.
2. The project may apply the symmetry of a task to the cluster and parameter file if necessary.
The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular symmetry in addition to the model parameters.
3. The project combines the results of the calculations for the various symmetries into one dataset that can be compared to the measurement.
The default method implemented in pmsco.project.Project just adds up all calculations with equal weight.
If you need more control, you need to override the pmsco.project.Project.combine_symmetries method and implement your own algorithm.
2. The project may use the domain index of a task to build the cluster and parameter file as necessary.
The pmsco.project.Project.create_cluster and pmsco.project.Project.create_params methods receive the index of the particular domain in addition to the model parameters.
3. The project combines the results of the calculations for the various domains into one dataset that can be compared to the measurement.
The default method implemented in pmsco.project.Project just adds up all calculations with customizable weight.
It uses the special model parameters `wdom1`, `wdom2`, ... (if defined, default 1) to weight each domain.
If you need more control, override the pmsco.project.Project.combine_domains method and implement your own algorithm.
*/

View File

@@ -12,7 +12,7 @@ mandated by the project but also efficient calculations in a multi-process envir
A concrete set of parameters is called @ref sec_task_model.
2. The sample was measured multiple times or under different conditions (initial states, photon energy, emission angle).
Each contiguous measured dataset is called a @ref sec_task_scan.
3. The measurement averages over multiple inequivalent domains, cf. @ref sec_task_symmetry.
3. The measurement averages over multiple inequivalent domains, cf. @ref sec_task_domain.
4. The measurement includes multiple geometrically inequivalent emitters, cf. @ref sec_task_emitter.
5. The calculation should be distributed over multiple processes that run in parallel to reduce the wall time, cf. @ref sec_task_region.
@@ -24,7 +24,7 @@ as shown schematically in the following diagram.
class CalculationTask {
model
scan
symmetry
domain
emitter
region
..
@@ -55,7 +55,7 @@ class Scan {
alphas
}
class Symmetry {
class Domain {
index
..
rotation
@@ -75,13 +75,13 @@ class Region {
CalculationTask *-- Model
CalculationTask *-- Scan
CalculationTask *-- Symmetry
CalculationTask *-- Domain
CalculationTask *-- Emitter
CalculationTask *-- Region
class Project {
scans
symmetries
domains
model_handler
cluster_generator
}
@@ -98,7 +98,7 @@ class ModelHandler {
Model ..> ModelHandler
Scan ..> Project
Symmetry ..> Project
Domain ..> Project
Emitter ..> ClusterGenerator
Region ..> Project
@@ -141,29 +141,29 @@ PMSCO runs a separate calculation for each scan file and compares the combined r
This is sometimes called a _global fit_.
\subsection sec_task_symmetry Symmetry
\subsection sec_task_domain Domain
A _symmetry_ is a discrete variant of a set of calculation parameters (including the atomic cluster)
A _domain_ is a discrete variant of a set of calculation parameters (including the atomic cluster)
that is independent of the _model_ and contributes incoherently to the measured diffraction pattern.
For instance, for a system that includes two inequivalent structural domains,
two separate clusters have to be generated and calculated for each model.
The symmetry parameter is not subject to optimization.
The domain parameter is not subject to optimization.
However, if the branching ratio is unknown a priori, a model parameter can be introduced
to control the relative contribution of a particular symmetry to the diffraction pattern.
In that case, the @ref pmsco.project.Project.combine_symmetries method must be overridden.
to control the relative contribution of a particular domain to the diffraction pattern.
The basic @ref pmsco.project.Project.combine_domains method reads the special model parameters `wdom1`, `wdom2`, etc. to weight the individual domains.
A symmetry is identified by its index which is an index into the project's symmetries table (pmsco.project.Project.symmetries).
It is up to the user project to give a physical description of the symmetry, e.g. a rotation angle,
by assigning a meaningful value (e.g. a dictionary with key-value pairs) to the symmetries table.
A domain is identified by its index which is an index into the project's domains table (pmsco.project.Project.domains).
It is up to the user project to give a physical description of the domain, e.g. a rotation angle,
by assigning a meaningful value (e.g. a dictionary with key-value pairs) to the domains table.
The cluster generator can then read the value from the table rather than from constants in the code.
The figure shows two examples of symmetry parameters.
The corresponding symmetry table could be set up like this:
The figure shows two examples of domain parameters.
The corresponding domains table could be set up like this:
@code{.py}
project.add_symmetry = {'rotation': 0.0, 'registry': 0.0}
project.add_symmetry = {'rotation': 30.0, 'registry': 0.0}
project.add_domain({'rotation': 0.0, 'registry': 0.0})
project.add_domain({'rotation': 30.0, 'registry': 0.0})
@endcode
@@ -173,9 +173,9 @@ The _emitter_ component of the calculation task selects a specific emitter confi
This is merely an index whose interpretation is up to the cluster generator.
The default emitter handler enumerates the emitter index from 1 to the emitter count reported by the cluster generator.
The emitter count and list of emitters may depend on model, scan and symmetry.
The emitter count and list of emitters may depend on model, scan and domain.
The cluster generator can tailor a cluster to the given model, scan, symmetry and emitter index.
The cluster generator can tailor a cluster to the given model, scan, domain and emitter index.
For example, in a large unit cell with many inequivalent emitters,
the generator might return a small sub-cluster around the actual emitter for better calculation performance
since the distant atoms of the unit cell do not contribute to the diffraction pattern.
@@ -237,20 +237,20 @@ scan
object ScanHandler
object "Sym: CalculationTask" as Sym {
object "Domain: CalculationTask" as Domain {
index = (i,j,k,-1,-1)
model
scan
symmetry
domain
}
object "SymmetryHandler" as SymHandler
object "DomainHandler" as DomainHandler
object "Emitter: CalculationTask" as Emitter {
index = (i,j,k,l,-1)
model
scan
symmetry
domain
emitter
}
@@ -260,7 +260,7 @@ object "Region: CalculationTask" as Region {
index = (i,j,k,l,m)
model
scan
symmetry
domain
emitter
region
}
@@ -270,14 +270,14 @@ object RegionHandler
Root "1" o.. "1..*" Model
Model "1" o.. "1..*" Scan
Scan "1" o.. "1..*" Sym
Sym "1" o.. "1..*" Emitter
Scan "1" o.. "1..*" Domain
Domain "1" o.. "1..*" Emitter
Emitter "1" o.. "1..*" Region
(Root, Model) .. ModelHandler
(Model, Scan) .. ScanHandler
(Scan, Sym) .. SymHandler
(Sym, Emitter) .. EmitterHandler
(Scan, Domain) .. DomainHandler
(Domain, Emitter) .. EmitterHandler
(Emitter, Region) .. RegionHandler
@enduml
@@ -293,7 +293,7 @@ and the tasks are passed back through the task handler stack.
In this phase, each level joins the datasets from the sub-tasks to the data requested by the parent task.
For example, at the lowest level, one result file is present for each region.
The region handler gathers all files that correspond to the same parent task
(i.e. have the same emitter, symmetry, scan and model attributes),
(i.e. have the same emitter, domain, scan and model attributes),
joins them to one file which includes all regions,
links the file to the parent task and passes the result to the next higher level.

View File

@@ -8,28 +8,30 @@ The code for a PMSCO job consists of the following components.
skinparam componentStyle uml2
component "project" as project
component "PMSCO" as pmsco
component "project" as project
component "scattering code\n(calculator)" as calculator
interface "command line" as cli
interface "input files" as input
interface "output files" as output
interface "experimental data" as data
interface "results" as results
interface "output files" as output
cli --> pmsco
data -> project
project ..> pmsco
pmsco ..> project
pmsco ..> calculator
cli --> project
input -> calculator
calculator -> output
pmsco -> results
@enduml
The main entry point is the _PMSCO_ module.
It implements a task loop to carry out the structural optimization
and provides an interface between calculation programs and project-specific code.
It also provides common utility classes and functions for the handling project data.
The _project_ consists of program code, system and experimental parameters
The _project_ consists of program code and parameters
that are specific to a particular experiment and calculation job.
The project code reads experimental data, defines the parameter dictionary of the model,
and contains code to generate the cluster, parameter and phase files for the scattering code.
@@ -40,10 +42,6 @@ which accepts detailed input files
(parameters, atomic coordinates, emitter specification, scattering phases)
and outputs an intensity distribution of photoelectrons versus energy and/or angle.
The _PMSCO core_ interfaces between the project and the calculator.
It carries out the structural optimization and manages the calculation tasks.
It generates and sends input files to the calculator and reads back the output.
\section sec_control_flow Control flow

View File

@@ -10,7 +10,7 @@ digraph G {
create_params;
calc_modf;
calc_rfac;
comb_syms;
comb_doms;
comb_scans;
}
*/
@@ -24,11 +24,11 @@ digraph G {
model_handler -> model_creator [constraint=false, label="optimize"];
}
subgraph cluster_symmetry {
label = "symmetry handler";
subgraph cluster_domain {
label = "domain handler";
rank = same;
sym_creator [label="expand models", group=creators];
sym_handler [label="combine symmetries", group=handlers];
dom_creator [label="expand models", group=creators];
dom_handler [label="combine domains", group=handlers];
}
subgraph cluster_scan {
@@ -47,15 +47,15 @@ digraph G {
calculator [label="calculator (EDAC)", shape=box];
model_creator -> sym_creator [label="model", style=bold];
sym_creator -> scan_creator [label="models", style=bold];
model_creator -> dom_creator [label="model", style=bold];
dom_creator -> scan_creator [label="models", style=bold];
scan_creator -> calc_creator [label="models", style=bold];
calc_creator -> calculator [label="clusters,\rparameters", style=bold];
calculator -> calc_handler [label="output files", style=bold];
calc_handler -> scan_handler [label="raw data files", style=bold];
scan_handler -> sym_handler [label="combined scans", style=bold];
sym_handler -> model_handler [label="combined symmetries", style=bold];
scan_handler -> dom_handler [label="combined scans", style=bold];
dom_handler -> model_handler [label="combined domains", style=bold];
mode [shape=parallelogram];
mode -> model_creator [lhead="cluster_model"];
@@ -76,8 +76,8 @@ digraph G {
calc_rfac [shape=cds, label="R-factor function"];
calc_rfac -> model_handler [style=dashed];
comb_syms [shape=cds, label="symmetry combination rule"];
comb_syms -> sym_handler [style=dashed];
comb_doms [shape=cds, label="domain combination rule"];
comb_doms -> dom_handler [style=dashed];
comb_scans [shape=cds, label="scan combination rule"];
comb_scans -> scan_handler [style=dashed];

View File

@@ -2,52 +2,44 @@
\section sec_run Running PMSCO
To run PMSCO you need the PMSCO code and its dependencies (cf. @ref pag_install),
a code module that contains the project-specific code,
and one or several files containing the scan parameters and experimental data.
a customized code module that contains the project-specific code,
one or several files containing the scan parameters and experimental data,
and a run-file specifying the calculation parameters.
Please check the <code>projects</code> folder for examples of project modules.
For a detailed description of the command line, see @ref pag_command.
\subsection sec_run_single Single Process
Run PMSCO from the command prompt:
The following instructions assume that PMSCO was installed as a Python site-package according to @ref pag_install.
To run PMSCO from the command prompt:
@code{.sh}
cd work-dir
python pmsco-dir project-dir/project.py [pmsco-arguments] [project-arguments]
python -m pmsco -j job-name -r run-file
@endcode
where <code>work-dir</code> is the destination directory for output files,
<code>pmsco-dir</code> is the directory containing the <code>__main__.py</code> file,
<code>project.py</code> is the specific project module,
and <code>project-dir</code> is the directory where the project file is located.
PMSCO is run in one process which handles all calculations sequentially.
where:
The command line arguments are divided into common arguments interpreted by the main pmsco code (pmsco.py),
and project-specific arguments interpreted by the project module.
| `work-dir` | Destination directory for output files |
| `run-file` | JSON-formatted configuration file that defines run-time parameters. The format and content of a run file is described in a section @ref pag_runfile. |
| `job-name` | (optional) The job name appears mainly as the prefix of all output files but is also used in the database and other places. The job name can also be declared in the run file. |
In this form, PMSCO is run in one process which handles all calculations sequentially.
Example command line for a single EDAC calculation of the two-atom project:
@code{.sh}
cd work/twoatom
python ../../pmsco ../../projects/twoatom/twoatom.py -s ea -o twoatom-demo -m single
python -m pmsco -j job0001 -r twoatom-hemi.json
@endcode
This command line executes the main pmsco module <code>pmsco.py</code>.
The main module loads the project file <code>twoatom.py</code> as a plug-in
and starts processing the common arguments.
The <code>twoatom.py</code> module contains only project-specific code
with several defined entry-points called from the main module.
The information which project to load is contained in the <code>twoatom-hemi.json</code> file,
along with all common and specific project arguments.
In the command line above, the <code>-o twoatom-demo</code> and <code>-m single</code> arguments
are interpreted by the pmsco module.
<code>-o</code> sets the base name of output files,
and <code>-m</code> selects the operation mode to a single calculation.
The scan argument is interpreted by the project module.
It refers to a dictionary entry that declares the scan file, the emitting atomic species, and the initial state.
In this example, the project looks for the <code>twoatom_energy_alpha.etpai</code> scan file in the project directory,
and calculates the modulation function for a N 1s initial state.
The kinetic energy and emission angles are contained in the scan file.
This example can be run for testing.
All necessary parameters and data files are included in the code repository.
\subsection sec_run_parallel Parallel Processes
@@ -61,30 +53,104 @@ The slave processes will run the scattering calculations, while the master coord
and optimizes the model parameters (depending on the operation mode).
For optimum performance, the number of processes should not exceed the number of available processors.
To start a two-hour optimization job with multiple processes on an quad-core workstation with hyperthreading:
To start an optimization job with multiple processes on a quad-core workstation with hyperthreading:
@code{.sh}
cd work/my_project
mpiexec -np 8 pmsco-dir/pmsco project-dir/project.py -o my_job_0001 -t 2 -m swarm
mpiexec -np 8 --use-hwthread-cpus python -m pmsco -j my_job002 -r my_project.json
@endcode
The `--use-hwthread` option is necessary on certain hyperthreading architectures.
\subsection sec_run_hpc High-Performance Cluster
The script @c bin/qpmsco.ra.sh takes care of submitting a PMSCO job to the slurm queue of the Ra cluster at PSI.
The script can be adapted to other machines running the slurm resource manager.
The script generates a job script based on @c pmsco.ra.template,
substituting the necessary environment and parameters,
and submits it to the queue.
PMSCO is ready to run with resource managers on cluster machines.
Code for submitting jobs to Slurm queues is included and can be customized for many machines.
For example, code for the slurm queue of the Ra cluster at PSI is included in the pmsco.schedule module.
Other machines can be supported by sub-classing pmsco.schedule.JobSchedule or pmsco.schedule.SlurmSchedule.
Execute @c bin/qpmsco.ra.sh without arguments to see a summary of the arguments.
To have PMSCO submit a job, the arguments for the queue are entered in the schedule section of the run file,
cf. @ref pag_runfile.
Then, the same command as for starting a calculation directly will instead submit a job to the queue:
To submit a job to the PSI clusters (see also the PEARL-Wiki page MscCalcRa),
the analog command to the previous section would be:
@code{.sh}
bin/qpmsco.ra.sh my_job_0001 1 8 2 projects/my_project/project.py swarm
python -m pmsco -j job-name -r run-file.json
@endcode
The command creates a separate work directory with copies of the project source, the run-file and the job script.
This job directory will also receive the calculation results.
The full path of the job directory is _output-dir/job-name_.
The directory must not exist when you run the above command to prevent overwriting of previous data.
The job name can be declared in the run file or on the command line.
The command above also loads the project module and scan files.
Many parameter errors are caught this way and can be fixed before the job is submitted to the queue.
The run file offers an option to prepare a script file and not to submit the job immediately
so that you can inspect the job files and submit the job manually.
Be sure to consider the resource allocation policy of the cluster
before you decide on the number of processes.
Requesting less resources will prolong the run time but might increase the scheduling priority.
\subsection sec_run_dirs Directories
Code and data files are typically located in different, possibly machine-specific locations.
This can make it difficult to port a project to another machine and to repeat calculations.
Ideally, a calculation job should be repeatable on different machines
with a minimum of changes to code, input data and parameter files.
Project code (which is under version control)
should never need modifications for porting to another machine.
Run-files (which are considered part of the data) can follow a project-specific or machine-specific directory structure.
PMSCO provides directory resolution at run-time to facilitate writing of portable code.
This is done by a number of directory aliases that can be included as shell-like placeholders, e.g. `${project}`, in file paths.
Some aliases are preset to system-based defaults,
further aliases can be added by the project code or declared in the run file.
Directory aliases can be used in Project.directories
as well as in other Project attributes that hold a file name.
The table below shows the aliases defined and/or required by PMSCO.
The paths are stored in Project.directories.
The aliases are resolved before the actual calculations start (in the Project.validate() method).
The resolved paths are printed to the log at warning level.
| Key | Description | Source | Use |
| --- | --- | --- | --- |
| work | Working directory at program start | PMSCO | |
| home | User's home directory | PMSCO | |
| project | Location of the project module. | PMSCO | Can be used to find auxiliary files that are part of the repository. |
| output | Intermediate and output files. | Must be set by the project or run file | The `output_file` property which serves as the basis of all output files is a concatenation of the `output` directory and `job_name`. |
| report | Directory for graphical output (reports) | Default: `${output}/report` | |
| data (optional) | Location of data (scan) files. | Project or run file | Usage is up to the project. |
| temp | Temporary files | | Reserved. Currently not supported |
| (job tag) | Any job_tags key that maps to a legal directory name can be included in a path | run file | project or run file |
| mode, job_name, project_name | These project attributes can be included in a path if they contain a valid directory name | | |
\subsection sec_run_stop Stopping a PMSCO job
A PMSCO optimization job stops on any one of the following events.
- The model handler is done.
Depending on the run mode, this happens when the optimization has converged or
the planned number of iterations or calculations has been reached.
- The number of calculation tasks exceeds the limit configured in `dispatch.MscoMaster.max_calculations`.
This is meant to prevent excessive and runaway jobs.
The default value is 1000000. It can be adjusted by the project code if necessary.
- The master process receives a SIGTERM, SIGUSR1 or SIGUSR2 from the operating system.
The signal can be sent, e.g., by the `kill` command on Linux.
This doesn´t work on all platforms.
- The time limit configured in `Project.timedelta_limit` is reached.
This is a soft limit and should be set shorter than the job reservation with the resource manager.
- A file named `finish_pmsco` is present in the output directory.
This is an easy way for a user to stop a running optimization.
The file doesn´t need any content.
It can be created by the `touch` command.
All these stop conditions cause graceful stops.
Running calculation tasks are waited for, but some results on the model level may not be complete.
Final reports of complete models are produced and the output folder is cleaned up.
Stops caused by resource managers such as Slurm are typically not graceful.
The results are in an undefined state, reports are not generated, and temporary files may be left over.
*/

View File

@@ -3,66 +3,66 @@
\subsection sec_general General Remarks
The PMSCO code is maintained under [Git](https://git-scm.com/).
The central repository for PSI-internal projects is at https://git.psi.ch/pearl/pmsco,
the public repository at https://gitlab.psi.ch/pearl/pmsco.
The central repository for development and PSI-internal projects is at https://gitea.psi.ch/pearl/pmsco,
the public repository at https://gitea.psi.ch/pearl-public/pmsco.
For their own developments, users should clone the repository.
Changes to common code should be submitted via pull requests.
Scientific projects should be maintained in a separate directory tree, cf. @ref sec_project.
The program code of PMSCO and its external programs is written in Python, C++ and Fortran.
The code will run in any recent Linux environment on a workstation or in a virtual machine.
Scientific Linux, CentOS7, [Ubuntu](https://www.ubuntu.com/)
and [Lubuntu](http://lubuntu.net/) (recommended for virtual machine) have been tested.
For optimization jobs, a workstation with at least 4 processor cores
For optimization jobs with parallel execution, a workstation with at least 4 processor cores
or cluster with 20-50 available processor cores is recommended.
The program requires about 2 GB of RAM per process.
The recommended IDE is [PyCharm (community edition)](https://www.jetbrains.com/pycharm).
The documentation in [Doxygen](http://www.stack.nl/~dimitri/doxygen/index.html) format is part of the source code.
The Doxygen compiler can generate separate documentation in HTML or LaTeX.
[Spyder](https://docs.spyder-ide.org/index.html) is a good alternative with a focus on scientific data.
The documentation in [Doxygen](https://www.doxygen.nl/index.html) format is part of the source code.
The Doxygen compiler can generate documentation in HTML.
@attention Due to rapidly evolving computing environments
some of the installation instructions on this page may be outdated or incompatible with certain environments.
\subsection sec_requirements Requirements
Please note that in some environments (particularly shared high-performance machines)
it may be important to choose specific compiler and library versions.
In order to maintain backward compatibility with some of these older machines,
it may be important to choose specific compiler and library versions that are tailored to the hardware platform.
In order to maintain backward compatibility with older installations,
code that requires new versions of compilers and libraries should be introduced carefully.
The code depends on the following libraries:
The following basic tools and libraries are required:
- GCC >= 4.8
- OpenMPI >= 1.10
- F2PY
- F2C
- SWIG
- GCC (C, C++, Fortran) >= 4.8
- BLAS
- LAPACK
- Python 2.7 or 3.6
- Numpy >= 1.13
- Python packages listed in the requirements.txt file
- OpenMPI >= 1.10
- Git
Most of these requirements are available from the Linux distribution.
For an easily maintainable Python environment, [Miniconda](https://conda.io/miniconda.html) is recommended.
The Python environment distributed with the OS often contains outdated packages,
and it's difficult to switch between different Python versions.
For the Python environment,
the [uv](https://docs.astral.sh/uv/) package and environment manager is recommended.
It can be installed by non-privileged users.
Other package managers like pip and conda may work as well but are not described here.
On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
Details are explained in the PEARL Wiki.
PMSCO runs under Python 2.7 or Python 3.6.
Since Python 2 is being deprecated, Python 3.6 is recommended.
Compatibility with Python 2.7 is currently maintained by using
the [future package](http://python-future.org/compatible_idioms.html)
but may be dropped at any time.
The following tools are required to compile the documentation.
They are not needed in calculations.
- doxygen
- doxypypy
- graphviz
- Java runtime environment (JRE)
- [plantUML](https://plantuml.com)
\subsection sec_install_instructions Instructions
Installation instructions are given for Ubuntu 24.04.
On managed HPC clusters use the compilers and libraries recommended by the administrator
(often provided by a module system).
\subsubsection sec_install_ubuntu Installation on Ubuntu
The following instructions install the necessary dependencies on Ubuntu, Debian or related distributions.
The Python environment is provided by [Miniconda](https://conda.io/miniconda.html).
@code{.sh}
sudo apt update
@@ -71,8 +71,6 @@ sudo apt install \
binutils \
build-essential \
doxygen \
doxypy \
f2c \
g++ \
gcc \
gfortran \
@@ -81,99 +79,17 @@ graphviz \
libblas-dev \
liblapack-dev \
libopenmpi-dev \
make \
nano \
openmpi-bin \
openmpi-common \
python3 \
python3-venv \
sqlite3 \
wget
@endcode
On systems where the link to libblas is missing (see @ref sec_compile below),
the following lines are necessary.
@code{.sh}
cd /usr/lib
sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
@endcode
Install Miniconda according to their [instructions](https://conda.io/docs/user-guide/install/index.html),
then configure the Python environment:
@code{.sh}
conda create -q --yes -n pmsco python=3.6
source activate pmsco
conda install -q --yes -n pmsco \
pip \
"numpy>=1.13" \
scipy \
ipython \
mpi4py \
matplotlib \
nose \
mock \
future \
statsmodels \
swig \
gitpython
pip install periodictable attrdict fasteners
@endcode
\subsubsection sec_install_singularity Installation in Singularity container
A [Singularity](https://www.sylabs.io/guides/2.5/user-guide/index.html) container
contains all OS and Python dependencies for running PMSCO.
Besides the Singularity executable, nothing else needs to be installed in the host system.
This may be the fastest way to get PMSCO running.
For installation of Singularity,
see their [user guide](https://www.sylabs.io/guides/2.5/user-guide/installation.html).
On newer Linux systems (e.g. Ubuntu 18.04), Singularity is available from the package manager.
Installation in a virtual machine on Windows or Mac are straightforward
thanks to the [Vagrant system](https://www.vagrantup.com/).
After installing Singularity,
check out PMSCO as explained in the @ref sec_compile section:
@code{.sh}
cd ~
mkdir containers
git clone git@git.psi.ch:pearl/pmsco.git pmsco
cd pmsco
git checkout master
git checkout -b my_branch
@endcode
Then, either copy a pre-built container into `~/containers`,
or build one from a script provided by the PMSCO repository:
@code{.sh}
cd ~/containers
sudo singularity build pmsco.simg ~/containers/pmsco/extras/singularity/singularity_python2
@endcode
To work with PMSCO, start an interactive shell in the container and switch to the pmsco environment.
Note that the PMSCO code is outside the container and can be edited with the usual tools.
@code{.sh}
cd ~/containers
singularity shell pmsco.simg
source activate pmsco
cd ~/containers/pmsco
make all
nosetests -w tests/
@endcode
Or call PMSCO from outside:
@code{.sh}
cd ~/containers
mkdir output
cd output
singularity run ../pmsco.simg python ~/containers/pmsco/pmsco path/to/your-project.py arg1 arg2 ...
@endcode
For parallel processing, prepend `mpirun -np X` to the singularity command as needed.
In addition, download and install [uv](https://docs.astral.sh/uv/).
PSI users should configure uv to use PSI's PyPI package cache (cf. documentation on the intranet).
\subsubsection sec_install_extra Additional Applications
@@ -189,73 +105,212 @@ gitg \
meld
@endcode
To produce documentation in PDF format (not recommended on virtual machine), install LaTeX:
To compile the documentation install the following tools.
The basic documentation is in HTML format and can be opened in any internet browser.
@code{.sh}
sudo apt-get install texlive-latex-recommended
sudo apt install \
doxygen \
graphviz \
default-jre
wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
sudo mkdir /opt/plantuml/
sudo mv plantuml.jar /opt/plantuml/
echo "export PLANTUML_JAR_PATH=/opt/plantuml/plantuml.jar" | sudo tee /etc/profile.d/pmsco-env.sh
@endcode
\subsection sec_compile Compilation
\subsection sec_distro Download PMSCO Source Code
Make sure you have access to the PMSCO Git repository and set up your Git environment.
Depending on your setup, location and permissions, one of the following addresses may work.
Private key authentication is usually recommended except on shared computers.
Clone or download the code from one of these repository addresses:
| Repository | Access |
| --- | --- |
| `git@git.psi.ch:pearl/pmsco.git` | PSI intranet, SSH private key authentication |
| `https://git.psi.ch/pearl/pmsco.git` | PSI intranet, password prompt |
| `git@gitlab.psi.ch:pearl/pmsco.git` | Public repository, SSH private key authentication |
| `https://gitlab.psi.ch/pearl/pmsco.git` | Public repository, password prompt |
Clone the code repository using one of these repositiory addresses and switch to the desired branch:
| https://gitea.psi.ch/pearl/pmsco | PSI internal |
| https://gitea.psi.ch/pearl-public/pmsco-public | Public |
@code{.sh}
git clone git@git.psi.ch:pearl/pmsco.git pmsco
cd ~
git clone {repo-address see above} pmsco
cd pmsco
git checkout master
@endcode
These instructions download the base package of PMSCO.
The public repository does not contain external programs (EDAC, PHAGEN, LOESS).
You need to obtain the source code for these programs from their respective owners,
copy them to the respective subprojects directories and
apply the patches included in the PMSCO distribution.
Please respect the respective license terms and acknowledge the use of the codes.
\subsection sec_install_environment Set up the Python Environment
The following instructions are for the [uv](https://docs.astral.sh/uv/) package manager.
For other package managers, the pyproject.toml and requirements.txt files list the necessary dependencies.
\subsubsection sec_install_uv Virtual Environment with uv
By default, uv creates the virtual environment automatically in a `.venv` folder inside the source directory tree.
In this case, no explicit setup is necessary, and pmsco can be called by:
~~~~~~{.sh}
uv run pmsco -h
~~~~~~
On some platforms, however, it may be necessary to separate the environment from the code,
e.g. because of limited storage space or quota in the home directory.
In this case, create the environment as follows:
~~~~~~{.sh}
cd ~
mkdir envs
cd envs
uv venv --clear my_pmsco_env
~~~~~~
The `--clear` option resets an existing environment to empty.
To activate this environment, call this command once in every terminal:
~~~~~~{.sh}
source ~/envs/my_pmsco_env/bin/activate
~~~~~~
\subsubsection sec_normal_install Installing PMSCO
to install PMSCO and all dependencies into the active environment,
run the following commands in the top-level PMSCO directory (where `pyproject.toml` is located).
The commands compile the Fortran and C++ code of the calculation programs using the
[Meson build system](https://mesonbuild.com/meson-python/index.html)
and install the binaries and Python code in the site-packages folder of the active Python environment.
~~~~~~{.sh}
uv sync --active
~~~~~~
To use the default `.venv` environment, omit the `--active` option (also in the uv commands shown further below).
Now, run the unit tests to check the installation:
~~~~~~{.sh}
uv run --active nosetests
~~~~~~
And check the help page:
~~~~~~{.sh}
uv run --active pmsco -h
~~~~~~
In the explicit environment, these commands can alternatively be called directly:
~~~~~~{.sh}
nosetests
pmsco -h
~~~~~~
The PMSCO packages are now accessible in Python import statements.
Verify it by opening a Python shell and entering:
~~~~~~{.py}
import pmsco.project
dir(pmsco.project)
~~~~~~
Note: By default, uv installs the Python code in editable mode.
Changes in the PMSCO source tree are visible as soon as you start a new Python interpreter.
This does not apply to the subpackages, however.
After modifying the subpackages, you need to clear and re-sync the environment.
\subsection sec_test Test project
Run the twoatom project to check that everything is installed correctly:
~~~~~~{.py}
cd ~
mkdir -p work/twoatom
cd work/twoatom
nice python -m pmsco -r {path-to-pmsco}/projects/twoatom/twoatom-hemi.json
~~~~~~
You should get a number of result files whose names start with `twoatom0001` in `~/work/twoatom/`,
including a hologram plot of the modulation function.
To learn more about running PMSCO, see @ref pag_run.
\subsection sec_install_projects Installing Namespace Packages
Instructions on how to set up your own projects as namespace packages are given in section \ref sec_project.
To install them into the pmsco namespace, call uv with the `--inexact` option.
Without `--inexact`, uv would remove the previously installed packages (including PMSCO).
~~~~~~{.sh}
uv sync --active --inexact
~~~~~~
\subsection sec_install_singularity Installation in a Singularity container
Singularity containers are currently unmaintained.
The PMSCO source includes an install script for the [Singularity](https://sylabs.io/singularity/) container system
under `extras/singularity`.
To get started with Singularity,
download it from [sylabs.io](https://www.sylabs.io/singularity/) and install it according to their instructions.
On Windows, Singularity can be installed in a virtual machine using the [Vagrant](https://www.vagrantup.com/)
script included under `extras/vagrant`.
After installing Singularity,
check out PMSCO as explained in the @ref sec_compile section:
@code{.sh}
cd ~
mkdir containers
cd containers
git clone git@gitea.psi.ch:pearl-public/pmsco-public.git pmsco
cd pmsco
git checkout master
git checkout -b my_branch
@endcode
Compile the code and run the unit tests to check that it worked.
Then, either copy a pre-built container into `~/containers`,
or build one from the definition file included under extras/singularity.
You may need to customize the definition file to match the host OS
or to install compatible OpenMPI libraries,
cf. [Singularity user guide](https://sylabs.io/guides/3.7/user-guide/mpi.html).
@code{.sh}
make all
nosetests -w tests/
cd ~/containers
sudo singularity build pmsco.sif ~/containers/pmsco/extras/singularity/singularity_python3
@endcode
If the compilation of _loess.so failes due to a missing BLAS library,
try to set a link to the BLAS library as follows (the actual file names may vary due to the actual distribution or version):
@code{.sh}
cd /usr/lib
sudo ln -s /usr/lib/libblas/libblas.so.3 libblas.so
@endcode
\subsection sec_test Tests
Run the unit tests.
They should pass successfully.
Re-check from time to time.
To work with PMSCO, start an interactive shell in the container and switch to the pmsco environment.
Note that the PMSCO code is outside the container and can be edited with the usual tools.
@code{.sh}
cd ~/pmsco
nosetests -w tests/
cd ~/containers
singularity shell pmsco.sif
. /opt/miniconda/etc/profile.d/conda.sh
conda activate pmsco
cd ~/containers/pmsco
meson setup build
meson compile -C build
meson install -C build
meson test -C build
@endcode
Run the twoatom project to check the compilation of the calculation programs.
Or call PMSCO from outside:
@code{.sh}
cd ~/pmsco
mkdir work
cd work
mkdir twoatom
cd twoatom/
nice python ~/pmsco/pmsco ~/pmsco/projects/twoatom/twoatom.py -s ea -o twoatom_energy_alpha -m single
cd ~/containers
mkdir output
cd output
singularity run -e ../pmsco.sif python -m pmsco -r path/to/your-runfile
@endcode
Runtime warnings may appear because the twoatom project does not contain experimental data.
For parallel processing, prepend `mpirun -np X` to the singularity command as needed.
Note that this requires compatible OpenMPI versions on the host and container to avoid runtime errors.
To learn more about running PMSCO, see @ref pag_run.
*/

View File

@@ -1,9 +1,11 @@
/*! @mainpage Introduction
\section sec_intro Introduction
PMSCO stands for PEARL multiple-scattering cluster calculations and structural optimization.
It is a collection of computer programs to calculate photoelectron diffraction patterns,
and to optimize structural models based on measured data.
PMSCO (PSI multiple-scattering cluster calculations and structural optimization)
is a Python-based workflow engine to calculate photoelectron diffraction patterns,
and to optimize structural models based on measured data using machine learning techniques.
PMSCO was developed at the [Paul Scherrer Institut (PSI)](https://www.psi.ch/)
by the team of the [PEARL beamline](https://www.psi.ch/en/sls/pearl).
The actual scattering calculation is done by code developed by other parties.
While the scattering program typically calculates a diffraction pattern based on a set of static parameters and a specific coordinate file in a single process,
@@ -12,7 +14,7 @@ PMSCO wraps around that program to facilitate parameter handling, cluster buildi
In the current version, PMSCO can make use of the following programs.
Other programs may be integrated as well.
- [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/)
- [EDAC](https://garciadeabajos-group.icfo.es/widgets/edac/)
by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley,
[Phys. Rev. B 63 (2001) 075404](http://dx.doi.org/10.1103/PhysRevB.63.075404)
- PHAGEN from the [MsSpec package](https://ipr.univ-rennes1.fr/msspec)
@@ -24,28 +26,28 @@ Other programs may be integrated as well.
- angle or energy scanned XPD.
- various scanning modes including energy, polar angle, azimuthal angle, analyser angle.
- averaging over multiple symmetries (domains or emitters).
- averaging over multiple domains and emitters.
- global optimization of multiple scans.
- structural optimization algorithms: particle swarm optimization, grid search, gradient search.
- structural optimization algorithms: genetic, particle swarm, grid search.
- calculation of the modulation function.
- calculation of the weighted R-factor.
- automatic parallel processing using OpenMPI.
- integrated and extensible reporting, database storage of results.
- automatic parallel processing using OpenMPI and job submission to scheduling systems.
\section sec_project Optimization Projects
\section sec_intro_project Optimization Projects
To set up a new optimization project, you need to:
- create a new directory under projects.
- create a new Python module in this directory, e.g., my_project.py.
- implement a sub-class of project.Project in my_project.py.
- override the create_cluster, create_params, and create_domain methods.
- optionally, override the combine_symmetries and combine_scans methods.
- add a global function create_project to my_project.py.
- provide experimental data files (intensity or modulation function).
- implement a sub-class of pmsco.project.Project in my_project.py.
- override the create_cluster, create_params, and create_model_space methods.
- optionally, override the combine_domains and combine_scans methods.
- add a global function create_project to my_project.py or create a @ref pag_runfile.
- prepare experimental data files (intensity or modulation function).
For details, see the documentation of the Project class,
and the example projects.
For details, see @ref pag_project, the documentation of the pmsco.project.Project class and the example projects.
\section sec_intro_start Getting Started
@@ -54,22 +56,24 @@ and the example projects.
- @ref pag_concepts_tasks
- @ref pag_concepts_emitter
- @ref pag_install
- @ref pag_project
- @ref pag_run
- @ref pag_command
- @ref pag_opt
\section sec_license License Information
An open distribution of PMSCO is available under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) at <https://gitlab.psi.ch/pearl-public/pmsco>.
The source code of PMSCO is licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
This _does not include_ the calculation packages contained in the subprojects folder which are licensed separately.
- Please read and respect the respective license agreements.
- Please acknowledge the use of the code.
- Please share your development of the code with the original author.
- Please consider sharing your developments with the original author.
Due to different copyright terms, the third-party calculation programs are not contained in the public software repository.
These programs may not be used without an explicit agreement by the respective original authors.
\author Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
\version This documentation is compiled from version $(REVISION).
\copyright 2015-2019 by [Paul Scherrer Institut](http://www.psi.ch)
\copyright 2015-2025 by [Paul Scherrer Institut](http://www.psi.ch)
\copyright Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
*/

View File

@@ -3,28 +3,34 @@
\subsection sec_opt_swarm Particle swarm
\subsection sec_opt_swarm Particle swarm optimization (PSO)
The particle swarm algorithm is adapted from
The particle swarm optimization (PSO) algorithm seeks to find a global optimum in a multi-dimensional model space
by employing the _swarm intelligence_ of a number of particles traversing space,
each at its own velocity and direction,
but adjusting its trajectory based on its own experience and the results of its peers.
The PSO algorithm is adapted from
D. A. Duncan et al., Surface Science 606, 278 (2012).
It is implemented in the @ref pmsco.optimizers.swarm module.
The general parameters of the genetic algorithm are specified in the @ref Project.optimizer_params dictionary.
The general parameters of the algorithm are specified in the @ref Project.optimizer_params dictionary.
Some of them can be changed on the command line.
| Parameter | Command line | Range | Description |
| --- | --- | --- | --- |
| pop_size | --pop-size | &ge; 1 | |
| pop_size | --pop-size | &ge; 1 | Recommended 20..50 |
| position_constrain_mode | | default bounce | Resolution of domain limit violations. |
| seed_file | --seed-file | a file path, default none | |
| seed_limit | --seed-limit | 0..pop_size | |
| rfac_limit | | 0..1, default 0.8 | Accept only seed values that have a lower R-factor. |
| recalc_seed | | True or False, default True | |
The domain parameters have the following meanings:
The model space attributes have the following meaning:
| Parameter | Description |
| --- | --- |
| start | Seed model. The start values are copied into particle 0 of the initial population. |
| start | Start value of particle 0 in first iteration. |
| min | Lower limit of the parameter range. |
| max | Upper limit of the parameter range. |
| step | Not used. |
@@ -32,23 +38,23 @@ The domain parameters have the following meanings:
\subsubsection sec_opt_seed Seeding a population
By default, one particle is initialized with the start value declared in the parameter domain,
and the other are set to random values within the domain.
By default, one particle is initialized with the start value declared with the model space,
and the other ones are initialized at random positions in the model space.
You may initialize more particles of the population with specific values by providing a seed file.
The seed file must have a similar format as the result `.dat` files
with a header line specifying the column names and data rows containing the values for each particle.
A good practice is to use a previous `.dat` file and remove unwanted rows.
To continue an interrupted optimization,
the `.dat` file from the previous optimization can be used as is.
The `.dat` file from a previous optimization job can be used as is to continue the optimization,
also in a different optimization mode.
The seeding procedure can be tweaked by several optimizer parameters (see above).
PMSCO normally loads the first rows up to population size - 1 or up to the `seed_limit` parameter,
whichever is lower.
If an `_rfac` column is present, the file is first sorted by R-factor and only the best models are loaded.
Models that resulted in an R-factor above the `rfac_limit` parameter are always ignored.
Models that resulted in an R-factor above the `rfac_limit` parameter are ignored in any case.
During the optimization process, all models loaded from the seed file are normally re-calculated.
In the first iteration of the optimization run, the models loaded from the seed file are re-calculated by default.
This may waste CPU time if the calculation is run under the same conditions
and would result in exactly the same R-factor,
as is the case if the seed is used to continue a previous optimization, for example.
@@ -58,25 +64,26 @@ and PMSCO will use the R-factor value from the seed file rather than calculating
\subsubsection sec_opt_patch Patching a running optimization
While an optimization process is running, the user can manually patch the population with arbitrary values,
While an optimization job is running, the user can manually patch the population with arbitrary values,
for instance, to kick the population out of a local optimum or to drive it to a less sampled parameter region.
To patch a running population, prepare a population file named `pmsco_patch.pop` and copy it to the work directory.
The file must have a similar format as the result `.dat` files
The patch file must have the same format as the result `.dat` files
with a header line specifying the column names and data rows containing the values.
It should contain as many rows as particles to be patched but not more than the size of the population.
The columns must include a `_particle` column which specifies the particle to patch
as well as the model parameters to be changed.
The columns must include a `_particle` column and the model parameters to be changed.
The `_particle` column specifies the index of the particle that is patched (ranging from 0 to population size - 1).
Parameters that should remain unaffected can be left out,
extra columns including `_gen`, `_rfac` etc. are ignored.
PMSCO checks the file for syntax errors and ignores it if errors are present.
Parameter values that lie outside the domain boundary are ignored.
Individual parameter values that lie outside the domain boundary are silently ignored.
Successful or failed patching is logged at warning level.
The patch file is re-applied whenever its time stamp has changed.
PMSCO keeps track of the time stamp of the file and re-applies the patch whenever the time stamp has changed.
\attention Do not edit the patch file in the working directory
to prevent it from being read in an unfinished state or multiple times.
\attention Since each change of time stamp may trigger patching,
do not edit the patch file in the working directory
to prevent it from being read in an unfinished state or multiple times!
\subsection sec_opt_genetic Genetic optimization
@@ -103,7 +110,7 @@ Some of them can be changed on the command line.
| Parameter | Command line | Range | Description |
| --- | --- | --- | --- |
| pop_size | --pop-size | &ge; 1 | |
| pop_size | --pop-size | &ge; 1 | Recommended 10..40 |
| mating_factor | | 1..pop_size, default 4 | |
| strong_mutation_probability | | 0..1, default 0.01 | Probability that a parameter undergoes a strong mutation. |
| weak_mutation_probability | | 0..1, default 1 | Probability that a parameter undergoes a weak mutation. This parameters should be left at 1. Lower values tend to produce discrete parameter values. Weak mutations can be tuned by the step domain parameters. |
@@ -113,7 +120,7 @@ Some of them can be changed on the command line.
| rfac_limit | | 0..1, default 0.8 | Accept only seed values that have a lower R-factor. |
| recalc_seed | | True or False, default True | |
The domain parameters have the following meanings:
The model space attributes have the following meaning:
| Parameter | Description |
| --- | --- |
@@ -129,7 +136,11 @@ cf. sections @ref sec_opt_seed and @ref sec_opt_swarm.
\subsection sec_opt_grid Grid search
The grid search algorithm samples the parameter space at equidistant steps.
The order of calculations is randomized so that distant parts of the parameter space are sampled at an early stage.
It is implemented in the @ref pmsco.optimizers.grid module.
The model space attributes have the following meaning.
The order of calculations is random so that results from different parts of the model space become available early.
| Parameter | Description |
| --- | --- |
@@ -149,15 +160,19 @@ The table scan calculates models from an explicit table of model parameters.
It can be used to recalculate models from a previous optimization run on other experimental data,
as an interface to external optimizers,
or as a simple input of manually edited model parameters.
It is implemented in the @ref pmsco.optimizers.table module.
The table can be stored in an external file that is specified on the command line,
or supplied in one of several forms by the custom project class.
The table can be left unchanged during the calculations,
or new models can be added on the go.
Duplicate models are ignored.
@attention Because it is not easily possible to know when and which models have been read from the table file, if you do modify the table file during processing, pay attention to the following hints:
1. The file on disk must not be locked for more than a second. Do not keep the file open unnecessarily.
2. _Append_ new models to the end of the table rather than overwriting previous ones. Otherwise, some models may be lost before they have been calculated.
@attention Because it is not easily possible to know when the table file is read,
if you do modify the table file while calculations are running,
1. Do not keep the file locked for longer than a second.
2. Append new models to the end of the table rather than overwriting previous ones.
3. Delete lines only if you're sure that they are not needed any more.
The general parameters of the table scan are specified in the @ref Project.optimizer_params dictionary.
Some of them can be changed on the command line or in the project class (depending on how the project class is implemented).
@@ -167,7 +182,7 @@ Some of them can be changed on the command line or in the project class (dependi
| pop_size | --pop-size | &ge; 1 | Number of models in a generation (calculated in parallel). In table mode, this parameter is not so important and can be left at the default. It has nothing to do with table size. |
| table_file | --table-file | a file path, default none | |
The domain parameters have the following meanings.
The model space attributes have the following meaning.
Models that violate the parameter range are not calculated.
| Parameter | Description |

514
docs/src/project.dox Normal file
View File

@@ -0,0 +1,514 @@
/*! @page pag_project Setting up a new project
\section sec_project Setting up a new project
This topic guides you through the setup of a new project.
Be sure to check out the examples in the projects folder
and the code documentation as well.
The basic steps are:
1. Create a new package folder under `pmsco/projects`.
To keep your code and PMSCO separate, you are suggested to start your own pmsco/projects tree
in a convenient location separate from the PMSCO source code.
2. Add the parent directory of your pmsco/projects tree to the Python path.
3. In the new folder, create a Python module for the project (subsequently called _the project module_).
4. In the project module, define a cluster generator class which inherits from @ref pmsco.cluster.ClusterGenerator.
5. In the project module, define a project class which inherits from @ref pmsco.project.Project.
6. Create one or more run files.
The basic steps listed above are recommended and explained in the following.
In previous versions, other mechanisms of project invocation were available.
They are now obsolete.
\subsection sec_packages Namespace packages
[Python namespace packages](https://realpython.com/python-namespace-package/) provide an easy way
to inject project modules into the PMSCO namespace
while their source files are kept separate from the core PMSCO packages.
This way, PMSCO and the project modules can be under separate version control.
Namespace packages work by extending the Python module search path.
The module loader looks for packages in every entry of the search path
and does not stop at the first match as it would do for a regular package.
The recommended folder structure is:
~~~~~~
pmsco-projects/
+-- pyproject.toml
+-- pmsco/
+-- projects/
+-- project1/
+-- project1.py
+-- run1.json
+-- ...
+-- project2/
+-- ...
~~~~~~
In place of `pmsco-projects`, `project1`, `project2`, `run1`, you should use distinct names.
The two levels `pmsco` and `projects` should be left as is.
If you now include `pmsco-projects` in the Pyton path,
all of your projects become available within the `pmsco` namespace, i.e.,
you can `import pmsco.projects.project1.project1` in Python.
Furthermore, you can call the module in a run-file without specifying a file path.
You may install multiple project packages if needed.
The recommended way to add `pmsco-projects` to the Python path is by an editable installation.
This will allow you to keep editing your project sources in place.
1. Place your project files in a directory tree similar to `pmsco-projects/pmsco/projects/project1/`.
The `pmsco/projects` level is mandatory as a part of the path.
Replace `pmsco-projects` and `project1` by your own choice.
2. Be sure not to create any `__init__.py` files in this directory tree.
3. Copy the `pyproject.toml` file from the PMSCO source into your `pmsco-projects` and adjust its contents.
At least give the package a distinct name.
4. Select another build backend if necessary.
The default [uv_build](https://docs.astral.sh/uv/concepts/build-backend/) is recommended for pure Python projects.
5. 'Install' the project locally.
With uv, call `uv sync --active --inexact`
while you are in the directory that contains the `pyproject.toml` file.
In plain pip the corresponding command would be
`pip install --editable .`.
6. Check that you can `import pmsco.projects.project1.project1` (or whatever your project is called) in a Python shell.
If you encounter problems importing the pmsco modules, check the Python path in a Python shell.
It must contain the `site-packages` directory of your Python environment.
Make sure it does not contain any pmsco or project source directory explicitly.
Also make sure that you don't have any `__init__.py` files in your project tree,
and do not use explicit paths to pmsco or your project anywhere in your source code or shell configuration files.
Be careful not to install packages multiple times in different locations.
In case of trouble, set up a fresh environment.
\subsection sec_project_module Project module
A skeleton of the project module file (with some common imports) may look like this:
~~~~~~{.py}
import logging
import math
import numpy as np
import periodictable as pt
from pathlib import Path
import pmsco.cluster
import pmsco.data
import pmsco.dispatch
import pmsco.elements.bindingenergy
import pmsco.project
logger = logging.getLogger(__name__)
class MyClusterGenerator(pmsco.cluster.ClusterGenerator):
def create_cluster(self, model, index):
clu = pmsco.cluster.Cluster()
# ...
return clu
def count_emitters(self, model, index):
# ...
return 1
class MyProject(pmsco.project.Project):
def __init__(self):
super().__init__()
# ...
self.cluster_generator = MyClusterGenerator(self)
def create_model_space():
spa = pmsco.project.ModelSpace()
# ...
return spa
def create_params(self, model, index):
par = pmsco.project.CalculatorParams()
# ...
return par
~~~~~~
The main purpose of the `MyProject` class is to bundle the project-specific calculation parameters and code.
The purpose of the `MyClusterGenerator` class is to produce atomic clusters as a function of a number of model parameters.
For the project to be useful, some of the methods in the skeleton above need to be implemented.
The individual methods are discussed in the following.
Further descriptions can be found in the documentation of the code.
\subsection sec_project_cluster Cluster generator
The cluster generator is a project-specific Python object that produces a cluster, i.e., a list of atomic coordinates,
based on a small number of model parameters whenever PMSCO requires it.
The most important method of a cluster generator is `create_cluster`.
At least this method must be implemented for a functional cluster generator.
A generic `count_emitters` method is implemented in the base class.
It needs to be overridden if inequivalent emitters should be calculated in parallel.
\subsubsection sec_project_cluster_create Cluster definition
The `create_cluster` method takes the model parameters (a dictionary)
and the task index (a pmsco.dispatch.CalcID, cf. @ref pag_concepts_tasks) as arguments.
Given these arguments, it creates and fills a @ref pmsco.cluster.Cluster object.
See @ref pmsco.cluster.ClusterGenerator.create_cluster for details on the method contract.
As an example, have a look at the following simplified excerpt from the `twoatom` demo project.
~~~~~~{.py}
class TwoatomCluster(ClusterGenerator):
# ...
def create_cluster(self, model, index):
# access model parameters
# dAB - distance between atoms in Angstroms
# th - polar angle in degrees
# ph - azimuthal angle in degrees
r = model['dAB']
th = math.radians(model['th'])
ph = math.radians(model['ph'])
# prepare a cluster object
clu = pmsco.cluster.Cluster()
# the comment line is optional but can be useful
clu.comment = "{0} {1}".format(self.__class__, index)
# set the maximum radius of the cluster (outliers will be ignored)
clu.set_rmax(r * 2.0)
# calculate atomic vectors
dx = r * math.sin(th) * math.cos(ph)
dy = r * math.sin(th) * math.sin(ph)
dz = r * math.cos(th)
a_top = np.array((0.0, 0.0, 0.0))
a_bot = np.array((-dx, -dy, -dz))
# add an oxygen atom at a_top position and mark it as emitter
clu.add_atom('O', a_top, 1)
# add a copper atom at a_bot position
clu.add_atom('Cu', a_bot, 0)
# pass the created cluster to the calculator
return clu
~~~~~~
In this example, two atoms are added to the cluster.
The @ref pmsco.cluster.Cluster class provides several methods to simplify the task,
such as adding layers or bulk regions, rotation, translation, trim, emitter selection, etc.
Please refer to the documentation of its code for details.
It may also be instructive to have a look at the demo projects.
The main purposes of the cluster object are to store an array of atoms and to read/write cluster files in a variety of formats.
For each atom, the following properties are stored:
- sequential atom index (1-based, maintained by cluster code)
- atom type (chemical element number)
- chemical element symbol from periodic table
- x coordinate of the atom position
- y coordinate of the atom position
- z coordinate of the atom position
- emitter flag (0 = scatterer, 1 = emitter, default 0)
- charge/ionicity (units of elementary charge, default 0)
- scatterer class (default 0)
All of these properties except the scatterer class can be set by the `add_xxxx` methods of the cluster.
The scatterer class is used internally by the atomic scattering factor calculators.
Whether the charge/ionicity is used, depends on the particular calculators, EDAC does not use it, for instance.
\note You do not need to take care how many emitters a calculator allows,
or whether the emitter needs to be at the origin or the first place of the array.
These technical aspects are handled by PMSCO code transparently.
\subsubsection sec_project_cluster_domains Domains
Domains refer to regions of inequivalent structure in the probing region.
This may include regions of different orientation, different lattice constant, or even different structure.
The cluster methods read the requested domain from the `index.domain` argument.
This is an index into the @ref pmsco.project.Project.domains list where each item is a dictionary
that holds additional, invariable structural parameters.
A common case are rotational domains.
In this case, the list of domains may look like `[{"zrot": 0.0}, {"zrot": 60.0}]`, for example,
and the `create_cluster` method would include additional code to rotate the cluster:
~~~~~~{.py}
def create_cluster(self, model, index):
# filling atoms here
# ...
dom = self.domains[index.domain]
try:
z_rot = dom['zrot']
except KeyError:
z_rot = 0.0
if z_rot:
clu.rotate_z(z_rot)
# selecting emitters
# ...
return clu
~~~~~~
Depending on the complexity of the system, it is advisable to split the code into a separate method for each domain.
The @ref pmsco.project.Project class includes generic code to add intensities of domains incoherently
(cf. @ref pmsco.project.Project.combine_domains).
In this case, the model space should contain parameters 'wdom0', 'wdom1', etc.,
that define the weights of domain 0, 1, etc.
To avoid correlations between parameters, one domain must have a fixed weight:
Typically, 'wdom0' is left undefined and defaults to 1.
\subsubsection sec_project_cluster_emitters Emitter configurations
If a project uses a large cluster and/or many emitters,
it may be more efficient to generate emitter-specific cluster configurations,
for instance to leverage process parallelization,
or to produce small, local clusters around the emitter site.
This concept is called _emitter configurations_ and explained in detail in @ref pag_concepts_emitter.
To implement emitter configurations, override the `count_emitters` method to return the number of emitter configurations.
In the simplest case, this is the number of inequivalent emitters:
~~~~~~{.py}
def count_emitters(self, model, index):
index = index._replace(emit=-1)
clu = self.create_cluster(model, index)
return clu.get_emitter_count()
~~~~~~
Next, modify the `create_cluster` method to check the emitter index (`index.emit`).
If it is -1, the method must return the full cluster with all inequivalent emitters marked.
If it is positive, only the corresponding emitter configuration must be marked.
For example, if each emitting atom represents a separate emitter configuration:
~~~~~~{.py}
def create_cluster(self, model, index):
# filling atoms here
# ...
# select all possible emitters (atoms of a specific element) in a cylindrical volume
# idx_emit is an array of atom numbers (0-based atom index)
idx_emit = clu.find_index_cylinder(origin, r_xy, r_z, self.project.scans[index.scan].emitter)
# if PMSCO asks for a specific emitter, restrict the array index:
if index.emit >= 0:
idx_emit = idx_emit[index.emit]
# mark the selected emitters
clu.data['e'][idx_emit] = 1
return clu
~~~~~~
Now, the individual emitter configurations are calculated in separate tasks
which can run in parallel in a multi-process environment.
Note that the processing time of EDAC scales linearly with the number of emitters.
\subsection sec_project_project Project class
Most commonly, a project class overrides the `__init__`, `create_model_space` and `create_params` methods.
Most other inherited methods can be overridden optionally,
for instance `validate`, `setup`, `calc_modulation`, `rfactor`,
as well as the combine methods `combine_rfactors`, `combine_domains`, `combine_emitters`, etc.
This introduction shall focus on the three most important methods.
\subsubsection sec_project_project_init Initialization and defaults
The `__init__` method defines and initializes project properties with default values.
It may also redefine properties of the base class.
The following code is just an example to give some ideas.
~~~~~~{.py}
class MyProject(pmsco.project.Project):
def __init__(self):
# call the inherited method first
super().__init__()
# re-define an inherited property
self.directories["data"] = Path("/home/pmsco/data")
# define a scan dictionary
self.scan_dict = {}
# fill the scan dictionary
self.build_scan_dict()
# create the cluster generator
self.cluster_generator = MyClusterGenerator(self)
# declare the list of domains (at least one is required)
self.domains = [{"zrot": 0.}]
def build_scan_dict(self):
self.scan_dict["empty"] = {"filename": "${pmsco}/projects/common/empty-hemiscan.etpi",
"emitter": "Si", "initial_state": "2p3/2"}
self.scan_dict["Si2p"] = {"filename": "${data}/xpd-Si2p.etpis",
"emitter": "Si", "initial_state": "2p3/2"}
~~~~~~
A scan dictionary is one way to specify locations and metadata of experimental files centrally in the project code.
The scan can then be selected by the dictionary key rather than copying file locations.
Note that all public attributes can be assigned from a run file.
This happens after the `__init__` method.
The values set by `__init__` serve as default values.
\subsubsection sec_project_project_space Model space
The model space defines the keys and value ranges of the model parameters.
There are three ways to declare the model space in order of priority:
1. Declare the model space in the run-file.
2. Assign a ModelSpace to the self.model_space property directly in the `__init__` method.
3. Implement the `create_model_space` method.
The third way may look like this:
~~~~~~{.py}
class MyProject(pmsco.project.Project):
def create_model_space(self):
# create an empty model space
spa = pmsco.project.ModelSpace()
# add parameters
spa.add_param('dAB', 2.05, width=0.25, step=0.05)
spa.add_param('th', 15.00, 0.00, 30.00, 1.00)
spa.add_param('ph', 90.00)
spa.add_param('V0', 21.96, width=10.0, step=1.0)
spa.add_param('Zsurf', 1.50)
spa.add_param('wdom1', 0.5, 0.10, 10.00, 0.10)
# return the model space
return spa
~~~~~~
This code declares six model parameters: `dAB`, `th`, `ph`, `V0`, `Zsurf` and `wdom1`.
Three of them are structural parameters (used by the cluster generator above),
two are used by the `create_params` method (see below),
and `wdom1` is used in @ref pmsco.project.Project.combine_domains
while summing up contributions from different domains.
The values in the arguments list correspond to the start value (initial guess),
the lower and upper boundaries of the value range,
and the step size for optimizers that require it.
If just one value is given the parameter is held constant during the optimization.
The range can, alternatively, be specified by the `width` argument.
A similar declaration in a run-file could look like as follows (some parameters omitted for brevity).
Parameter values can be numeric constants,
or simple Python math expressions in double quotes.
~~~~~~{.py}
{
"project": {
// ...
"model_space": {
"dAB": {
"start": "2.0 / math.cos(math.radians(15.0))",
"width": 0.25,
"step": 0.05
},
"th": {
"start": 15.0,
"min": 0.0,
"max": 30.0,
"step": 1.0
},
"Zsurf": {
"start": 1.50
}
// ...
}
}
}
~~~~~~
\subsubsection sec_project_project_params Calculation parameters
Non-structural parameters that are needed for the input files of the calculators are passed
in a @ref pmsco.project.CalculatorParams object.
This object is created and filled in the `create_params` method of the project class.
The following example is from the `twoatoms` demo project:
~~~~~~{.py}
class MyProject(pmsco.project.Project):
def create_params(self, model, index):
params = pmsco.project.CalculatorParams()
# meta data
params.title = "two-atom demo"
params.comment = "{0} {1}".format(self.__class__, index)
# initial state and binding energy
initial_state = self.scans[index.scan].initial_state
params.initial_state = initial_state
emitter = self.scans[index.scan].emitter
params.binding_energy = pt.elements.symbol(emitter).binding_energy[initial_state]
# experimental setup
params.polarization = "H"
params.polar_incidence_angle = 60.0
params.azimuthal_incidence_angle = 0.0
params.experiment_temperature = 300.0
# material parameters
params.z_surface = model['Zsurf']
params.work_function = 4.5
params.inner_potential = model['V0']
params.debye_temperature = 356.0
# multiple-scattering parameters (EDAC)
params.emitters = []
params.lmax = 15
params.dmax = 5.0
params.orders = [25]
return params
~~~~~~
Most of the code is generic and can be copied to other projects.
Only the experimental and material parameters need to be adjusted.
Other properties can be changed as needed, see @ref pmsco.project.CalculatorParams.
\subsection sec_project_args Passing run-time parameters
The recommended way of passing calculation parameters is via @ref pag_runfile.
Run-files allow for a complete separation of code and data in a generic and flexible way.
Program code can be managed by a version control system,
and run-files can be stored along with the results.
This simplifies the reproduction of previous calculations and documentation of the workflow.
For testing and simple projects, it is possible to hard-code all parameters in the project class.
\subsubsection sec_project_args_runfile Setting up a run-file
The usage and format of run-files is described in detail under @ref pag_runfile.
\subsubsection sec_project_args_code Hard-coded arguments
Though it's normally recommended to declare all parameters in the run-file,
parameter values can also be hard-coded in the constructor and/or the validate method of the project class.
Which method to use depends on the processing stage.
The constructor can set default values for rarely changing parameters.
The declarations in the run-file override the defaults from the constructor.
If some parameters need adjusting _after_ the run-file has been loaded,
this can be done in the validate` method.
The call sequence of the methods is as follows.
1. `Project.__init__`:
The constructor is usually overridden by the project.
The constructor must call the superclass before applying its values.
2. `Project.set_properties`:
Sets the parameters from the run-file and resolves class names.
This method can be overridden if additional classes need resolving after loading the run-file.
It must call the superclass.
3. `Project.validate`: Parameters are validated, i.e., checked and made consistent.
Handler classes are resolved.
The `validate` method or its sub-methods can be overridden by the project.
The inherited method should be called.
*/

135
docs/src/reports.dox Normal file
View File

@@ -0,0 +1,135 @@
/*! @page pag_reports Reports
\section sec_reports Reports
The main output of PMSCO is the model parameters to R-factor mapping.
By default, it is produced in the form of a text file (.dat) as well as an sqlite3 database file (.db).
Graphical representations of the result data, called _reports_ in PMSCO, can be produced automatically at run-time or
manually after the calculation has ended.
PMSCO provides a number of pre-defined reports as well as an interface for custom reports.
Essentially, a report is defined by a Python class which derives from `pmsco.reports.base.ProjectReport`.
Instances of reports are added to the project's `reports` list during initialization of the calculation job.
They are called by the calculation handlers whenever a new model-level result is available in the database.
While reports typically produce graphics files for diagnostics,
report classes could basically produce any derived data including data files in different formats.
By default, no report is produced during a project run.
There are several ways to generate reports:
- Add instances of reports to the `reports` list of the project object.
This can be done in the project code or in the @ref pag_runfile.
One or multiple reports (of different classes) can be added and configured.
- Some report modules have their own command line interface.
This allows you to produce a report at any time during or after the project run.
- Lastly, all reports are Python classes and can be instantiated and executed in a Python shell.
The remainder of this page describes some of the pre-defined reports and their configuration parameters (attributes).
@note Reporting is still under development.
The configuration parameters and behaviour is subject to change, and the documentation may be partially outdated.
Be sure to check the in-line documentation as well as the source code for the latest information.
\subsection sec_reports_common Common Parameters
The reports share some common parameters which may, however, be used differently or ignored by some reports.
| Key | Values | Description |
| --- | --- | --- |
| `filename_format` | template string using `${key}`-type placeholders | Template string for file names of reports. Possible placeholders are listed below. |
| `title_format` | template string using `${key}`-type placeholders | Template string for graph titles. Possible placeholders are listed below. |
| `canvas` | string. default: `matplotlib.backends.backend_agg.FigureCanvasAgg` (PNG) | A matplotlib figure canvas such as FigureCanvasAgg, FigureCanvasPdf or FigureCanvasSVG. |
The `filename_format` and `title_format` attributes are template strings which can contain `${key}` type placeholders.
placeholders are replaced according to the following table.
Some of these values may not be available if you call the reports outside of an optimization run
(e.g., from the command line of a report module).
| Key | Description |
| --- | --- |
| `base` | Base file name. Default: job name |
| `mode` | optimization mode |
| `job_name` | job name |
| `project_name` | project name |
| any directories key | corresponding directories value |
| any job_tags key | corresponding job_tags value |
\subsection sec_reports_convergence Convergence Plot
The convergence plot is a violin plot where each violin represents the R-factor distribution of one generation.
The minimum, maximum and mean values are marked, and the distribution is indicated by the body.
Convergence plots are suitable for genetic or swarm optimizations.
| Key | Values | Description |
| --- | --- | --- |
| __class_name__ | pmsco.reports.population.ConvergencePlot | |
| filename_format | template string using `${key}`-type placeholders | See common section. |
| title_format | template string using `${key}`-type placeholders | See common section. |
\subsection sec_reports_genetic Genetic Chart
A genetic chart is a pseudo-colour representation of the coordinates of each individual in the model space.
The chart shows the amount of diversity in the population
and - by comparing charts of different generations - the changes due to mutation.
The axes are the model parameters (x) and particle number (y).
The colour is mapped from the relative parameter value within the parameter range.
Genetic charts are suitable for genetic or swarm optimizations.
| Key | Values | Description |
| --- | --- | --- |
| __class_name__ | pmsco.reports.population.GeneticPlot | |
| filename_format | template string using `${key}`-type placeholders | See common section. |
| title_format | template string using `${key}`-type placeholders | See common section. |
| cmap | string: 'viridis', 'plasma' (default), 'inferno', 'magma', 'cividis' | Name of colour map supported by matplotlib. |
| params | list of model parameter names | |
In addition to the common template substitutions,
the genetic chart report replaces the following placeholders
of the `filename_format` and `title_format` template strings.
| Key | Description |
| --- | --- |
| `gen` | Generation index (population reports only) |
\subsection sec_reports_swarm Particle Swarm Plot
The particle swarm plot shows the current positions and velocities of particles projected onto two dimensions.
The plot contains three elements:
- a pseudo-color scatter plot of all R-factors in the background,
- a scatter plot of particle positions.
- a quiver plot indicating the velocities of the particles.
Particle swarm plots are suitable in particle swarm optimization mode only.
| Key | Values | Description |
| --- | --- | --- |
| __class_name__ | pmsco.reports.population.SwarmPlot | |
| filename_format | template string using `${key}`-type placeholders | See common section. |
| title_format | template string using `${key}`-type placeholders | See common section. |
| cmap | string: 'viridis', 'plasma' (default), 'inferno', 'magma', 'cividis' | Name of colour map supported by matplotlib. |
| params | nested list of pairs of model parameter names | |
In addition to the common template substitutions,
the particle swarm plot report replaces the following placeholders
of the `filename_format` and `title_format` template strings.
| Key | Description |
| --- | --- |
| `gen` | Generation index (population reports only) |
| `param0` | Parameter name 0 (population reports only) |
| `param1` | Parameter name 1 (population reports only) |
\subsection sec_reports_misc Miscellaneous
To make a video from swarm or genetic plots, you may use ffmpeg on Linux:
~~~~~~{.sh}
ffmpeg -framerate 5 -i basename-%00d.geneticplot.png -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p basename.geneticplot.mp4
~~~~~~
*/

506
docs/src/runfile.dox Normal file
View File

@@ -0,0 +1,506 @@
/*! @page pag_runfile Run File
\section sec_runfile Run File
This section describes the format of a run-file.
Run-files are a flexible way of passing arguments to a PMSCO process.
The benefits are:
- contain all essential parameters to repeat a calculation - no need to remember or record the command line
- avoid cluttering up the command line or frequent changes of source code
- can be versioned or stored separately from the code, maintain a single file or multiple files - up to the user
- any property and sub-property of the project object can be assigned in a generic way - even custom properties that are unknown to PMSCO
- no necessity for the project code to parse the command line
- schema validation can help to find syntax errors while editing
\subsection sec_runfile_how How It Works
Run-files are text files in machine and human readable [JSON](https://en.wikipedia.org/wiki/JSON) format.
In PMSCO, run-files contain dictionaries of parameters to be passed to the project object.
For the calculations, internally, the project object is the main container of calculation parameters, model objects and input data.
Upon launching PMSCO, a generic parser reads the run-file,
constructs the project object from the specified custom project class
and assigns the attributes defined in the run-file.
Run-files are a sort of script that assign data to the project.
The parser does not expect specific data types or classes.
It merely copies data items to the project attributes of the same name.
The validation and interpretation of the data is up to the project object.
The parser handles the following situations:
- Strings, numbers as well as dictionaries and lists of simple objects are assigned directly to project attributes.
If the project class declares a setter method for the attribute, the setter is called.
Else, the existing attribute is overwritten.
Setters can execute custom code to validate the data value.
- If specified in the run-file, the parser creates objects from classes in the namespace of the project module
and recursively assigns their properties.
\note There are no implicit checks of correctness of the assigned data objects!
The author of the run-file must make sure that the run-file is compatible with the project class,
else the calculation process might fail.
There are three ways to check assigned attributes before the calculations are started.
All have to be implemented explicitly by the project maintainer:
1. The run-file can be validated against a JSON schema before launching PMSCO (see below).
Schema validation may catch some obvious mistakes
but is not complete in the sense that it cannot guarantee error-free execution of the project code.
2. The classes used with run-files define property setters.
The setters can raise an exception or post an error in the log.
(The latter won't stop the calculation process.)
3. The project class implements a validation method to check and fix important or error-prone attributes.
It can write warnings and errors to the log, or raise an exception if the process should be aborted.
\subsection sec_runfile_general General File Format
Run-files must adhere to the [JSON](https://en.wikipedia.org/wiki/JSON) format.
Specifically, a JSON file can declare dictionaries, lists and simple objects
such as strings, numbers and `null`.
The syntax of these basic elements is similar to Python source code (there are some differences, though).
At the top level, a PMSCO run-file contains a dictionary with up to two items:
1. The _project_ item is the most important, it is described in the following under @ref sec_runfile_project.
2. The _schedule_ item is an optional section for passing the parameters to a job queue of a computing cluster.
See @ref sec_runfile_schedule .
\subsection sec_runfile_schema Schema
The structure of a JSON file can be described in a _schema_ file that can be used to check the syntax and structure programmatically.
The `schema/runfile.schema.json` file of the PMSCO distribution describes the structure of a run-file as well as common properties of the project.
The schema is, however, rather basic and does not cover all parameters, conditional cases or custom project properties.
A run-file can be easily validated against the schema while editing in the PyCharm IDE.
Alternatively, the jsonschema validator from the Python distribution can be used on the command line.
\subsection sec_runfile_project Project Specification
The following minimum run-file from the twoatom project demonstrates how to specify the project:
~~~~~~{.py}
{
"project": {
"__module__": "twoatom",
"__class__": "TwoatomProject",
"mode": "single",
"job_name": "twoatom0001"
}
}
~~~~~~
Here, the `project` keyword denotes the dictionary that is used to construct the project object.
Within the project dictionary, the `__module__` key selects the Python module file that contains the project code,
and `__class__` refers to the name of the actual project class.
Further dictionary items correspond to attributes of the project class.
The module name is the same as would be used in a Python import statement.
It must be findable on the Python path.
Alternatively, a file path may be specified.
PMSCO ensures that the directory containing the `pmsco` and `projects` sub-directories is on the Python path.
The class name must be in the namespace of the loaded module.
As PMSCO starts, it imports the specified module,
constructs an object of the specified project class,
and assigns any further items to project attributes.
In the example above, it creates an object of type `TwoatomProject` from the `twoatom` module
and assigns `single` to the `mode` property and `twoatom0001` to the `job_name` property.
Any attributes not specified in the run-file remain at their default values
that were set by the `__init__` constructor of the project class.
Note that parameter names must start with an alphabetic character, else they are ignored
(useful for comments as JSON does not have a syntax for comments).
Also note that PMSCO does not spell-check parameter names.
The parameter values are just written to the corresponding object attribute.
If a name is misspelled, the value will be written to the wrong attribute.
PMSCO carries out only some most important checks on the given parameter values.
Incorrect values may lead to improper operation or exceptions later in the calculations.
The project class can explicitly check and fix important or error-prone attributes, or report errors.
The following sub-sections describe the most common properties of the project class.
\subsubsection sec_runfile_common Common Arguments
The following table lists some important parameters controlling the calculations.
They are declared in the pmsco.projects.Project class.
| Key | Values | Description |
| --- | --- | --- |
| mode | `single` (default), `grid`, `swarm`, `genetic`, `table`, `test`, `validate` | Operation mode. `validate` can be used to check the syntax of the run-file, the process exits before starting calculations. |
| directories | dictionary | This dictionary lists common file paths used in the project. It contains keys such as `home`, `project`, `output` (see documentation of Project class in pmsco.project). |
| output_dir | path | Shortcut for directories["output"] |
| data_dir | path | Shortcut for directories["data"] |
| job_name | string, must be a valid and unique file name (see note below) | Base name for all produced output files. It is recommended to set a unique name for each calculation run. Do not include a path. The path can be set in _output_dir_. |
| cluster_generator | dictionary | Class name and attributes of the cluster generator. See below. |
| atomic_scattering_factory | string<br>Default: InternalAtomicCalculator from pmsco.calculators.calculator | Class name of the atomic scattering calculator. This name must be in the namespace of the project module. |
| multiple_scattering_factory | string<br>Default: EdacCalculator from pmsco.calculators.edac | Class name of the multiple scattering calculator. This name must be in the namespace of the project module. |
| model_space | dictionary | See @ref sec_runfile_space below. |
| domains | list of dictionaries | See @ref sec_runfile_domains below. |
| scans | list of dictionaries | See @ref sec_runfile_scans below. |
| optimizer_params | dictionary | See @ref sec_runfile_optimizer below. |
\note The *job name* parameter appears most visibly as the prefix of output file names.
It is also registered in the `jobs` table of the results database (if used),
and it is used to identify the job with a job scheduling system.
For these reasons, it is important that the job name be unique within the respective subsystem.
Specifically, you need to *provide a new job name each time you start pmsco*, otherwise the job may fail.
It may be more natural to specify the job name on the command line using the `-o` argument
rather than changing the run file every time.
Unfortunately, PMSCO cannot auto-generate, auto-increment or verify the job name.
File names specified in a runfile can include an explicit path or a placeholder.
Placeholders have the format `${key}` where `key` must be one of the keys of the `directories` dictionary.
The placeholder will then be replaced by the corresponding value before the calculation starts
(as a part of the pmsco.project.Project.validate method).
The `directories` dictionary can be filled by the project class or in the runfile.
In addition, a number of keys are defined by PMSCO and can be used as placeholders in other directories and file paths.
| Key | Type | Description |
| --- | --- | --- |
| data | absolute | Directory with experimental data. Must be set by user if needed. |
| home | absolute | Home directory of the current user |
| pmsco | absolute | Directory that contains the loaded pmsco.py module. Note: This may be in a site packages directory. |
| output | absolute | Output directory. Must be set by the user. |
| project | absolute | Directory where the project module is located. |
| project_name | relative | Name of the project. By default, the name of the project class. |
| job_name | relative | Name of the calculation job. |
| mode | relative | Calculation mode |
| report | absolute | Report directory. Defaults to `${output}/report`. |
| run | absolute | Directory where the runfile is located (if used). |
| temp | absolute | Directory for temporary files. Currently not used. |
| work | absolute | Current working directory |
Placeholders of absolute paths must be used at the beginning of a path.
Relative paths can be used at any position in a file path.
Some of the keys may have empty values if PMSCO was loaded in a non-standard way.
For verification of the path resolution, all directories are printed to the log file at WARNING level (default).
The following table lists some common control parameters and metadata
that affect the behaviour of the program but do not affect the calculation results.
The job metadata is used to identify and describe a job in the results database if requested.
| Key | Values | Description |
| --- | --- | --- |
| db_file | new or existing file path or `:memory:` | SQLite3 database file to receive the optimization results. If the database exists, results are inserted under the given job name. If it doesn't exist, a new file is created. If the attribute is `:memory:`, an in-memory database is used internally and flushed at the end of processing. |
| job_tags | dictionary of strings | User-specified job tags in key-value format (metadata). |
| description | string | Description of the calculation job (metadata) |
| time_limit | decimal number<br>Default: 24. | Wall time limit in hours. The optimizers try to finish before the limit. This cannot be guaranteed, however. |
| keep_files | list of file categories | Output file categories to keep after the calculation. Multiple values can be specified and must be separated by spaces. By default, cluster and model (simulated data) of a limited number of best models are kept. See @ref sec_runfile_files below. |
| keep_best | integer number<br>Default: 10 | number of best models for which result files should be kept. |
| keep_levels | integer number<br>Default: 1 | numeric task level down to which files are kept. 1 = scan, 2 = domain, 3 = emitter. |
| log_level | DEBUG, INFO, WARNING, ERROR, CRITICAL | Minimum level of messages that should be added to the log. Empty string turns off logging. |
| log_file | file system path<br>Default: job_name + ".log". | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. The log name is created in the working directory. |
\subsubsection sec_runfile_space Model Space
The `model_space` parameter is a dictionary of model parameters.
The key is the name of the parameter as used by the cluster and input-formatting code,
the value is a dictionary holding the `start`, `min`, `max`, `step` values to be used by the optimizer.
Instead of `min` and `max` you may declare the `width`, which will center the space on the start value.
All parameter values can be declared as numbers or as simple Python expressions in double quotes.
Expressions are evaluated by the Python `eval` function.
All functions in the namespace of the project module.
Note that you have to import the `math` or `numpy` modules in your project module
if you want to use their functions.
~~~~~~{.py}
{
"project": {
// ...
"model_space": {
"dAB": {
"start": 2.109,
"min": "2.109 - 0.1",
"max": "2.109 + 0.1",
"step": 0.05
},
"pAB": {
"start": "4 * 3.56 / math.sqrt(3.0)",
"width": 4.0,
"step": 0.5
},
// ...
}
}
}
~~~~~~
Alternatively, the `model_space` can be declared as a `ModelSpace` object.
However, this shall not be described in detail here.
\subsubsection sec_runfile_domains Domains
Domains is a list of dictionaries.
Each dictionary holds keys describing the domain to the cluster and input-formatting code.
The meaning of these keys is up to the project.
An example:
~~~~~~{.py}
{
"project": {
// ...
"domains": [
{"surface": "Te", "doping": null, "zrot": 0.0},
{"surface": "Te", "doping": null, "zrot": 60.0}
],
}
}
~~~~~~
\subsection sec_runfile_scans Experimental Scan Files
The pmsco.scan.Scan objects used in the calculation cannot be instantiated from the run-file directly.
Instead, the scans object of the run-file is a list of scan creators/loaders which specify how to create a Scan object.
The pmsco.scan module defines four scan creators: `ScanLoader`, `ScanCreator`, `HoloScanCreator` and `ScanKey`.
The following code block shows examples:
~~~~~~{.py}
{
"project": {
// ...
"scans": [
{
"__class__": "ScanCreator",
"filename": "twoatom_energy_alpha.etpai",
"emitter": "N",
"initial_state": "1s",
"positions": {
"e": "np.arange(10, 400, 5)",
"t": "0",
"p": "0",
"a": "np.linspace(-30, 30, 31)"
}
},
{
"__class__": "ScanLoader",
"filename": "${project}/twoatom_hemi_250e.etpi",
"emitter": "N",
"initial_state": "1s",
"is_modf": false
},
{
"__class__": "HoloScanCreator",
"filename": "${project}/twoatom_scan3.etpi",
"emitter": "N",
"initial_state": "1s",
"generator": "pmsco.data.holo_grid",
"generator_args": {
"theta_start": 90,
"theta_step": 1,
"theta_range": 90,
"phi_start": 0,
"phi_range": 360,
"phi_refinement": 1
},
"other_positions": {"e": 250, "a": 0}
},
{
"__class__": "HoloScanCreator",
"filename": "${project}/twoatom_scan4.etpi",
"emitter": "N",
"initial_state": "1s",
"other_positions": {"e": 250, "a": 0}
}
]
}
}
~~~~~~
The class name must be specified as it would be called in the custom project module.
For the example shown above, the following import statements are necessary in the `pmsco.projects.twoatom.py` module.
(Other forms of the import statement can be used accordingly.)
~~~~~~{.py}
import numpy as np
import pmsco.data
from pmsco.scan import ScanKey, ScanLoader, ScanCreator, HoloScanCreator
~~~~~~
The *ScanCreator* object creates a scan using Numpy array constructors in `positions`.
In the example above, a two-dimensional rectangular energy-alpha scan grid is created.
The values of the positions axes are passed to Python's `eval` function
and must return a one-dimensional Numpy `ndarray`.
The `emitter` and `initial_state` keys define the probed core level.
The *HoloScanCreator* object creates a /holo scan/, i.e., an angle scan of the theta and phi axes.
The distribution of the grid points is defined by a separate generator function.
Usually, the default pmsco.data.holo_grid function is used
which generates the well-known Osterwalder holo scan
with constant point density in solid angle and equidistant polar steps.
The `generator` and `generator_args` properties have default values.
The two example holo scans above are equivalent,
as the first one above just uses default values explicitly.
If you want to specify a generator function explicitly,
you must import it into the namespace of your project.
E.g. for `pmsco.data.holo_grid` you have to `import pmsco.data`.
The *ScanLoader* object loads a data file, specified under `filename`.
The filename can include a placeholder which is replaced by the corresponding item from Project.directories.
Note that some of the directories (including `project`) are pre-set by PMSCO.
It is recommended to add a `data` key under `directories` in the run-file
if the data files are outside of the PMSCO directory tree.
The `is_modf` key indicates whether the file contains a modulation function (`true`) or intensity (`false`).
In the latter case, the modulation function is calculated after loading.
The *ScanKey* is the shortest scan specification in the run-file.
It should not be used in new projects as it uses hard-coded data links in program code.
ScanKey is a shortcut to a complete scan dictionary in the project object.
The `scan_dict` must be set up in the `__init__` method of the project class.
The `key` item specifies which key of `scan_dict` should be used to create the Scan object.
Each item of `scan_dict` holds a dictionary
that holds the attributes for either a `ScanCreator`, `HoloScanCreator` or a `ScanLoader`.
If it contains a `positions` (`other_positions`) key, it represents a `ScanCreator` (`HoloScanCreator`), else a `ScanLoader`.
\subsection sec_runfile_optimizer Optimizer Parameters
The `optimizer_params` is a dictionary holding one or more of the following items.
| Key | Values | Description |
| --- | --- | --- |
| pop_size | integer<br>The default value is the greater of 4 or the number of parallel calculation processes. | Population size (number of particles) in swarm and genetic optimization mode. |
| seed_file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.optimizers.population.Population.seed_from_file. |
| seed_limit | integer | Number of seed models to import. |
| recalc_seed | true or false | If true, the seed models are calculated. Otherwise, the R-factor from the seed file is used as result. Use true if the seed file contains no or outdated R-factors. |
| table_source | file system path | Name of the model table file in table scan mode. |
\subsubsection sec_runfile_files File Categories
The following category names can be used with the `keep_files` option.
Multiple names can be specified as a list.
| Category | Description | Default Action |
| --- | --- | --- |
| all | shortcut to include all categories | |
| input | raw input files for calculator, including cluster and phase files in custom format | delete |
| output | raw output files from calculator | delete |
| atomic | atomic scattering and emission files in portable format | delete |
| cluster | cluster files in portable XYZ format for report | keep |
| debug | debug files | delete |
| model | output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) | keep |
| scan | output files in ETPAI format: scan (a_b_-1_-1_-1) | keep |
| domain | output files in ETPAI format: domain (a_b_c_-1_-1) | delete |
| emitter | output files in ETPAI format: emitter (a_b_c_d_-1) | delete |
| region | output files in ETPAI format: region (a_b_c_d_e) | delete |
| report| final report of results | keep always |
| population | final state of particle population | keep |
| rfac | files related to models which give bad r-factors, see warning below | delete |
\note
The `report` category is always kept and cannot be turned off.
The `model` category is always kept in single calculation mode.
\warning
If you want to specify `rfac` with the `keep_files` option,
you have to add the file categories that you want to keep, e.g.,
`"keep_files": ["rfac", "cluster", "model", "scan", "population"]`
(to return the default categories for all calculated models).
Do not specify `rfac` alone as this will effectively not return any file.
\subsection sec_runfile_reports Reports
Run-time graphical reports are configured in the `reports` section.
The section is organized as a list of dictionaries.
Each dictionary sets up a specific report.
For example:
~~~~~~{.py}
{
"project": {
// ...
"reports": [
{
"__class__": "ConvergencePlot",
"filename_format": "${base}.convergence",
"title_format": "my_calc"
},
{
"__class__": "SwarmPlot",
"filename_format": "${base}-${param0}-${param1}-${gen}.swarmplot",
"title_format": "my_calc ${param0}-${param1} gen ${gen}",
"params": [["A", "B"], ["C", "D"]]
}
]
}
}
~~~~~~
The class name must be specified as it would be called in the custom project module.
For the example above, the import section of the project must include:
~~~~~~{.py}
from pmsco.reports.convergence import ConvergencePlot
from pmsco.reports.swarm import SwarmPlot
~~~~~~
For details on reports and their configuration, see @ref sec_reports.
\subsection sec_runfile_schedule Job Scheduling
To submit a job to a resource manager such as Slurm, add a `schedule` section to the run file
(section ordering is not important):
~~~~~~{.py}
{
"schedule": {
"__module__": "pmsco.schedule",
"__class__": "PsiRaSchedule",
"nodes": 1,
"tasks_per_node": 24,
"walltime": "2:00",
"manual_run": true,
"enabled": true
},
"project": {
"__module__": "projects.twoatom.twoatom",
"__class__": "TwoatomProject",
"mode": "single",
"output_file": "${home}/pmsco/twoatom0001",
...
}
}
~~~~~~
In the same way as for the project, the `__module__` and `__class__` keys select the class that handles the job submission.
In this example, it is pmsco.schedule.PsiRaSchedule which is tied to the Ra cluster at PSI.
For other machines, you can sub-class one of the classes in the pmsco.schedule module and include it in your project package.
The derived job submission class must prepare the code, run file and job script, and submit the job to the queue.
It should copy the code to the calculation directory to avoid version conflicts if the user continues to edit the code.
Compilation of the code can be done before submission or as a part of the job script.
@note It is difficult to check the run file and code against errors that may abort job execution.
New code and run files should be tested with a modified, fast-running calculation.
The parameters of pmsco.schedule.PsiRaSchedule are as follows.
Information about the computing nodes and partitions can be printed by the `sinfo -Nel` and `sinfo --long` commands.
| Key | Values | Description |
| --- | --- | --- |
| nodes | integer: 1..2 | Number of compute nodes (main boards on Ra). The maximum number available for PEARL is 2. |
| tasks_per_node | integer: 1..24, 32 | Number of tasks (CPU cores on Ra) per node. Jobs with less than 24 tasks are assigned to the shared partition. |
| wall_time | string: [days-]hours[:minutes[:seconds]] <br> dict: with any combination of days, hours, minutes, seconds | Maximum run time (wall time) of the job. |
| manual | bool | Manual submission (true) or automatic submission (false). Manual submission allows you to inspect the job files before submission. |
| enabled | bool | Enable scheduling (true). Otherwise, the calculation is started directly (false). |
@note The calculation job may run in a different working directory than the current one.
It is important to specify absolute data and output directories in the run file (project/directories section).
Placeholders like `${home}` can be used to make run files portable, cf. @ref sec_run_dirs.
*/

View File

@@ -38,15 +38,15 @@ custom_scan [label="scan\nconfiguration", shape=note];
{rank=same; custom_scan; create_scan; combine_scan;}
custom_scan -> create_scan [lhead=cluster_scan];
subgraph cluster_symmetry {
label="symmetry handler";
subgraph cluster_domain {
label="domain handler";
rank=same;
create_symmetry [label="define\nsymmetry\ntasks"];
combine_symmetry [label="gather\nsymmetry\nresults"];
create_model_space [label="define\ndomain\ntasks"];
combine_domain [label="gather\ndomain\nresults"];
}
custom_symmetry [label="symmetry\ndefinition", shape=cds];
{rank=same; create_symmetry; combine_symmetry; custom_symmetry;}
custom_symmetry -> combine_symmetry [lhead=cluster_symmetry];
custom_domain [label="domain\ndefinition", shape=cds];
{rank=same; create_model_space; combine_domain; custom_domain;}
custom_domain -> combine_domain [lhead=cluster_domain];
subgraph cluster_emitter {
label="emitter handler";
@@ -80,11 +80,11 @@ create_cluster -> edac;
create_model -> create_scan [label="level 1 tasks"];
evaluate_model -> combine_scan [label="level 1 results", dir=back];
create_scan -> create_symmetry [label="level 2 tasks"];
combine_scan -> combine_symmetry [label="level 2 results", dir=back];
create_scan -> create_model_space [label="level 2 tasks"];
combine_scan -> combine_domain [label="level 2 results", dir=back];
create_symmetry -> create_emitter [label="level 3 tasks"];
combine_symmetry -> combine_emitter [label="level 3 results", dir=back];
create_model_space -> create_emitter [label="level 3 tasks"];
combine_domain -> combine_emitter [label="level 3 results", dir=back];
create_emitter -> create_region [label="level 4 tasks"];
combine_emitter -> combine_region [label="level 4 results", dir=back];

View File

@@ -28,7 +28,7 @@ remove_task_file()
class CalcID {
model
scan
sym
domain
emit
region
}

View File

@@ -43,15 +43,15 @@ parent = 2, -1, -1, -1, -1
model = {'d': 7}
}
Scan11 o.. Sym111
Scan11 o.. Dom111
object Sym111 {
object Dom111 {
id = 1, 1, 1, -1, -1
parent = 1, 1, -1, -1, -1
model = {'d': 5}
}
Sym111 o.. Emitter1111
Dom111 o.. Emitter1111
object Emitter1111 {
id = 1, 1, 1, 1, -1
@@ -90,18 +90,18 @@ scan
object ScanHandler
object "Sym: CalculationTask" as Sym {
object "Domain: CalculationTask" as Domain {
model
scan
symmetry
domain
}
object "SymmetryHandler" as SymHandler
object "DomainHandler" as DomainHandler
object "Emitter: CalculationTask" as Emitter {
model
scan
symmetry
domain
emitter
}
@@ -110,7 +110,7 @@ object EmitterHandler
object "Region: CalculationTask" as Region {
model
scan
symmetry
domain
emitter
region
}
@@ -120,14 +120,14 @@ object RegionHandler
Root "1" o.. "1..*" Model
Model "1" o.. "1..*" Scan
Scan "1" o.. "1..*" Sym
Sym "1" o.. "1..*" Emitter
Scan "1" o.. "1..*" Domain
Domain "1" o.. "1..*" Emitter
Emitter "1" o.. "1..*" Region
(Root, Model) .. ModelHandler
(Model, Scan) .. ScanHandler
(Scan, Sym) .. SymHandler
(Sym, Emitter) .. EmitterHandler
(Scan, Domain) .. DomainHandler
(Domain, Emitter) .. EmitterHandler
(Emitter, Region) .. RegionHandler
@enduml

View File

@@ -4,7 +4,7 @@
class CalculationTask {
model
scan
symmetry
domain
emitter
region
..
@@ -35,7 +35,7 @@ class Scan {
alphas
}
class Symmetry {
class Domain {
index
..
rotation
@@ -55,13 +55,13 @@ class Region {
CalculationTask *-- Model
CalculationTask *-- Scan
CalculationTask *-- Symmetry
CalculationTask *-- Domain
CalculationTask *-- Emitter
CalculationTask *-- Region
class Project {
scans
symmetries
domains
model_handler
cluster_generator
}
@@ -78,7 +78,7 @@ class ModelHandler {
Model ..> ModelHandler
Scan ..> Project
Symmetry ..> Project
Domain ..> Project
Emitter ..> ClusterGenerator
Region ..> Project

View File

@@ -9,14 +9,6 @@ name
code
}
class Scan << (T,orchid) >> {
id
..
job_id
..
name
}
class Job << (T,orchid) >> {
id
..
@@ -30,6 +22,22 @@ datetime
description
}
class Tag << (T,orchid) >> {
id
..
..
key
}
class JobTag << (T,orchid) >> {
id
..
tag_id
job_id
..
value
}
class Model << (T,orchid) >> {
id
..
@@ -46,10 +54,12 @@ id
model_id
..
scan
sym
domain
emit
region
rfac
timestamp
secs
}
class Param << (T,orchid) >> {
@@ -66,11 +76,13 @@ param_id
model_id
..
value
delta
}
Project "1" *-- "*" Job
Job "1" *-- "*" JobTag
Tag "1" *-- "*" JobTag
Job "1" *-- "*" Model
Job "1" *-- "*" Scan
Param "1" *-- "*" ParamValue
Model "1" *-- "*" ParamValue
Model "1" *-- "*" Result

View File

@@ -20,7 +20,7 @@ repeat
partition "generate tasks" {
:define model tasks;
:define scan tasks;
:define symmetry tasks;
:define domain tasks;
:define emitter tasks;
:define region tasks;
}
@@ -34,7 +34,7 @@ end fork
partition "collect results" {
:gather region results;
:gather emitter results;
:gather symmetry results;
:gather domain results;
:gather scan results;
:gather model results;
}

View File

@@ -5,10 +5,10 @@ package pmsco {
mode
code
scans
symmetries
domains
{abstract} create_cluster()
{abstract} create_params()
{abstract} create_domain()
{abstract} create_model_space()
}
}
@@ -18,7 +18,7 @@ package projects {
__init__()
create_cluster()
create_params()
create_domain()
create_model_space()
}
}

View File

@@ -4,13 +4,13 @@ abstract class Project {
mode : str = "single"
code : str = "edac"
scans : Scan [1..*]
symmetries : dict [1..*]
domains : dict [1..*]
cluster_generator : ClusterGenerator
handler_classes
files : FileTracker
{abstract} create_cluster() : Cluster
{abstract} create_params() : Params
{abstract} create_domain() : Domain
{abstract} create_params() : CalculatorParams
{abstract} create_model_space() : ModelSpace
}
class Scan {
@@ -28,7 +28,7 @@ class Scan {
import_scan_file()
}
class Domain {
class ModelSpace {
start : dict
min : dict
max : dict
@@ -37,7 +37,7 @@ class Domain {
get_param(name)
}
class Params {
class CalculatorParams {
title
comment
cluster_file

View File

@@ -0,0 +1,86 @@
@startuml
'https://plantuml.com/class-diagram
class ConfigurableObject
class Scan {
filename: str
raw_data: numpy.ndarray
dtype: numpy.dtype
modulation: numpy.ndarray
modulation_func: Callable
modulation_args: Dict
rfactor_func: Callable
rfactor_args: Dict
mode: List
emitter: str
initial_state: str
positions: Dict
__init__()
copy()
load()
define_scan()
import_scan_file()
analyse_raw_data()
generate_holo_scan()
}
class ScanSpec {
filename: str
emitter: str
initial_state: str
modulation_func: Callable
modulation_args: Dict
rfactor_func: Callable
rfactor_args: Dict
__init__()
load()
}
class ScanKey {
project: pmsco.project.Project
key: str
__init__()
load()
}
class ScanLoader {
is_modf: bool
patch: Dict
__init__()
load()
}
class ScanCreator {
positions: Dict
__init__()
load()
}
class HoloScanCreator {
generator: Callable
generator_args: Dict
__init__()
load()
set_property()
}
ConfigurableObject <|-- ScanSpec
ConfigurableObject <|-- ScanKey
ScanSpec <|-- ScanCreator
ScanSpec <|-- ScanLoader
ScanSpec <|-- HoloScanCreator
ScanKey --> ScanCreator: creates
ScanKey --> HoloScanCreator: creates
ScanKey --> ScanLoader: creates
ScanLoader --> Scan: creates
ScanCreator --> Scan: creates
HoloScanCreator --> Scan: creates
@enduml

View File

@@ -25,7 +25,7 @@ stop
|pmsco|
start
:define task (model, scan, symmetry, emitter, region);
:define task (model, scan, domain, emitter, region);
|project|
:create cluster;
:create parameters;

View File

@@ -2,21 +2,19 @@
skinparam componentStyle uml2
component "project" as project
component "PMSCO" as pmsco
component "project" as project
component "scattering code\n(calculator)" as calculator
interface "command line" as cli
interface "input files" as input
interface "output files" as output
interface "experimental data" as data
interface "results" as results
interface "output files" as output
cli --> pmsco
data -> project
project ..> pmsco
pmsco ..> project
pmsco ..> calculator
cli --> project
input -> calculator
calculator -> output
pmsco -> results

View File

@@ -5,16 +5,16 @@ package pmsco {
mode
code
scans
symmetries
domains
cluster_generator
handler_classes
__
{abstract} create_cluster()
{abstract} create_params()
{abstract} create_domain()
{abstract} create_model_space()
..
combine_scans()
combine_symmetries()
combine_domains()
combine_emitters()
calc_modulation()
calc_rfactor()
@@ -34,9 +34,9 @@ package projects {
setup()
..
create_params()
create_domain()
create_model_space()
..
combine_symmetries()
combine_domains()
}
class UserClusterGenerator {

View File

@@ -0,0 +1,30 @@
FROM python:3.12
# docker container to build PMSCO documentation
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
default-jre \
doxygen \
gawk \
git \
graphviz \
pandoc \
wget \
&& rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir \
doxypypy \
meson \
meson-python \
ninja \
pynose
RUN wget -O plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
ENV PLANTUML_JAR_PATH=/app/plantuml.jar
COPY . .
CMD ["sh"]

View File

@@ -1,118 +0,0 @@
BootStrap: debootstrap
OSVersion: bionic
MirrorURL: http://ch.archive.ubuntu.com/ubuntu/
%help
a singularity container for PMSCO.
git clone requires an ssh key for git.psi.ch.
try agent forwarding (-A option to ssh).
#%setup
# executed on the host system outside of the container before %post
#
# this will be inside the container
# touch ${SINGULARITY_ROOTFS}/tacos.txt
# this will be on the host
# touch avocados.txt
#%files
# files are copied before %post
#
# this copies to root
# avocados.txt
# this copies to /opt
# avocados.txt /opt
#
# this does not work
# ~/.ssh/known_hosts /etc/ssh/ssh_known_hosts
# ~/.ssh/id_rsa /etc/ssh/id_rsa
%labels
Maintainer Matthias Muntwiler
Maintainer_Email matthias.muntwiler@psi.ch
Python_Version 2.7
%environment
export PATH="/usr/local/miniconda3/bin:$PATH"
export PYTHON_VERSION=2.7
export SINGULAR_BRANCH="singular"
export LC_ALL=C
%post
export PYTHON_VERSION=2.7
export LC_ALL=C
sed -i 's/$/ universe/' /etc/apt/sources.list
apt-get update
apt-get -y install \
binutils \
build-essential \
doxygen \
doxypy \
f2c \
g++ \
gcc \
gfortran \
git \
graphviz \
libblas-dev \
liblapack-dev \
libopenmpi-dev \
make \
nano \
openmpi-bin \
openmpi-common \
sqlite3 \
wget
apt-get clean
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p /usr/local/miniconda3
export PATH="/usr/local/miniconda3/bin:$PATH"
conda create -q --yes -n pmsco python=${PYTHON_VERSION}
. /usr/local/miniconda3/bin/activate pmsco
conda install -q --yes -n pmsco \
pip \
"numpy>=1.13" \
scipy \
ipython \
mpi4py \
matplotlib \
nose \
mock \
future \
statsmodels \
swig
conda clean --all -y
/usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners
#%test
# test the image after build
%runscript
# executes command from command line
. /usr/local/miniconda3/bin/activate pmsco
exec echo "$@"
%apprun install
. /usr/local/miniconda3/bin/activate pmsco
cd ~
git clone https://git.psi.ch/pearl/pmsco.git pmsco
cd pmsco
git checkout develop
git checkout -b ${SINGULAR_BRANCH}
make all
nosetests
%apprun python
. /usr/local/miniconda3/bin/activate pmsco
exec python "${@}"
%apprun conda
. /usr/local/miniconda3/bin/activate pmsco
exec conda "${@}"

View File

@@ -1,12 +1,13 @@
BootStrap: debootstrap
OSVersion: bionic
OSVersion: focal
MirrorURL: http://ch.archive.ubuntu.com/ubuntu/
%help
a singularity container for PMSCO.
A singularity container for PMSCO.
git clone requires an ssh key for git.psi.ch.
try agent forwarding (-A option to ssh).
singularity run -e pmsco.sif path/to/pmsco -r path/to/your-runfile
path/to/pmsco must point to the directory that contains the __main__.py file.
#%setup
# executed on the host system outside of the container before %post
@@ -31,25 +32,28 @@ try agent forwarding (-A option to ssh).
%labels
Maintainer Matthias Muntwiler
Maintainer_Email matthias.muntwiler@psi.ch
Python_Version 3
Python_Version 3.8
%environment
export PATH="/usr/local/miniconda3/bin:$PATH"
export PYTHON_VERSION=3
export SINGULAR_BRANCH="singular"
export LC_ALL=C
export PYTHON_VERSION=3
export CONDA_ROOT=/opt/miniconda
export PLANTUML_JAR_PATH=/opt/plantuml/plantuml.jar
export SINGULAR_BRANCH="singular"
%post
export PYTHON_VERSION=3
export LC_ALL=C
export PYTHON_VERSION=3.8
export CONDA_ROOT=/opt/miniconda
export PLANTUML_ROOT=/opt/plantuml
sed -i 's/$/ universe/' /etc/apt/sources.list
apt-get update
apt-get -y install \
binutils \
build-essential \
default-jre \
doxygen \
doxypy \
f2c \
g++ \
gcc \
@@ -59,59 +63,75 @@ try agent forwarding (-A option to ssh).
libblas-dev \
liblapack-dev \
libopenmpi-dev \
make \
nano \
openmpi-bin \
openmpi-common \
sqlite3 \
wget
apt-get clean
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p /usr/local/miniconda3
export PATH="/usr/local/miniconda3/bin:$PATH"
wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O ~/miniforge3.sh
bash ~/miniforge3.sh -b -p ${CONDA_ROOT}
. ${CONDA_ROOT}/etc/profile.d/conda.sh
conda activate base
conda create -q --yes -n pmsco python=${PYTHON_VERSION}
. /usr/local/miniconda3/bin/activate pmsco
conda install -q --yes -n pmsco \
pip \
"numpy>=1.13" \
scipy \
ipython \
mpi4py \
matplotlib \
nose \
mock \
conda install -q --yes -n pmsco -c conda-forge \
commentjson \
fasteners \
future \
gitpython
ipython \
ipykernel \
jsonschema \
h5py \
matplotlib \
meson \
mock \
pynose \
"numpy>=1.13" \
pandas \
periodictable \
pip \
scikit-learn \
scipy \
seaborn \
sqlalchemy \
statsmodels \
swig
conda clean --all -y
/usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners
${CONDA_ROOT}/envs/pmsco/bin/pip install meson-python mpi4py netgraph networkx doxypypy
mkdir ${PLANTUML_ROOT}
wget -O ${PLANTUML_ROOT}/plantuml.jar https://sourceforge.net/projects/plantuml/files/plantuml.jar/download
#%test
# test the image after build
%runscript
# executes command from command line
source /usr/local/miniconda3/bin/activate pmsco
exec echo "$@"
. ${CONDA_ROOT}/etc/profile.d/conda.sh
conda activate pmsco
exec python "$@"
%apprun install
source /usr/local/miniconda3/bin/activate pmsco
. ${CONDA_ROOT}/etc/profile.d/conda.sh
conda activate pmsco
cd ~
git clone https://git.psi.ch/pearl/pmsco.git pmsco
cd pmsco
git checkout develop
git checkout master
git checkout -b ${SINGULAR_BRANCH}
make all
nosetests
%apprun python
source /usr/local/miniconda3/bin/activate pmsco
exec python "${@}"
%apprun conda
source /usr/local/miniconda3/bin/activate pmsco
exec conda "${@}"
meson setup build
cd build
meson compile
meson install
meson test
%apprun compile
. ${CONDA_ROOT}/etc/profile.d/conda.sh
conda activate pmsco
cd build
meson compile
meson install
meson test

View File

@@ -12,8 +12,8 @@ Vagrant.configure("2") do |config|
# Every Vagrant development environment requires a box. You can search for
# boxes at https://vagrantcloud.com/search.
config.vm.box = "singularityware/singularity-2.4"
config.vm.box_version = "2.4"
config.vm.box = "sylabs/singularity-3.7-ubuntu-bionic64"
config.vm.box_version = "3.7"
# Disable automatic box update checking. If you disable this, then
# boxes will only be checked for updates when the user runs

View File

@@ -1,55 +0,0 @@
SHELL=/bin/sh
# makefile for all programs, modules and documentation
#
# required libraries for LOESS module: libblas, liblapack, libf2c
# (you may have to set soft links so that linker finds them)
#
# on shared computing systems (high-performance clusters)
# you may have to switch the environment before running this script.
#
# note: the public distribution does not include third-party code
# (EDAC in particular) because of incompatible license terms.
# please obtain such code from the original authors
# and copy it to the proper directory before compilation.
#
# the MSC and MUFPOT programs are currently not used.
# they are not built by the top-level targets all and bin.
#
# the make system uses the compiler executables of the current environment.
# to override the executables, you may set the following variables.
# to switch between python versions, however, the developers recommend miniconda.
#
# PYTHON = python executable (default: python)
# PYTHONOPTS = python options (default: none)
# CC = C and Fortran compiler executable (default: gcc)
# CCOPTS = C compiler options (default: none)
# CXX = C++ compiler executable (default: g++)
# CXXOPTS = C++ compiler options (default: none)
#
# make all PYTHON=/usr/bin/python2.7
#
# or:
#
# export PYTHON=/usr/bin/python2.7
# make all
#
.PHONY: all bin docs clean edac loess msc mufpot phagen
PMSCO_DIR = pmsco
DOCS_DIR = docs
all: edac loess docs
bin: edac loess
edac loess msc mufpot phagen:
$(MAKE) -C $(PMSCO_DIR)
docs:
$(MAKE) -C $(DOCS_DIR)
clean:
$(MAKE) -C $(PMSCO_DIR) clean
$(MAKE) -C $(DOCS_DIR) clean

View File

View File

@@ -8,16 +8,13 @@ python pmsco [pmsco-arguments]
@endverbatim
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from pathlib import Path
import sys
import os.path
file_dir = os.path.dirname(__file__) or '.'
root_dir = os.path.join(file_dir, '..')
root_dir = os.path.abspath(root_dir)
sys.path[0] = root_dir
pmsco_root = Path(__file__).resolve().parent.parent
if str(pmsco_root) not in sys.path:
sys.path.insert(0, str(pmsco_root))
if __name__ == '__main__':
import pmsco.pmsco

View File

@@ -42,7 +42,7 @@ class Calculator(object):
or <code>output_file + '.etpai'</code> depending on scan mode.
all other intermediate files are deleted unless keep_temp_files is True.
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
@param params: a pmsco.project.CalculatorParams object with all necessary values except cluster and output files set.
@param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set.

View File

@@ -11,23 +11,23 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import os
import pmsco.calculators.calculator as calculator
from pmsco.compat import open
import pmsco.data as md
import pmsco.cluster as mc
import pmsco.edac.edac as edac
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
try:
import edac
except (ImportError, ModuleNotFoundError) as e:
edac = None
logger.critical("Error importing the edac package.", exc_info=e)
class EdacCalculator(calculator.Calculator):
def write_input_file(self, params, scan, filepath):
@@ -49,7 +49,7 @@ class EdacCalculator(calculator.Calculator):
if alpha is defined, theta is implicitly set to normal emission! (to be generalized)
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
@param params: a pmsco.project.CalculatorParams object with all necessary values except cluster and output files set.
@param scan: a pmsco.project.Scan() object describing the experimental scanning scheme.
@@ -59,7 +59,7 @@ class EdacCalculator(calculator.Calculator):
"""
files = {}
with open(filepath, "w") as f:
with open(filepath, "wt", encoding="latin1") as f:
f.write("verbose off\n")
f.write("cluster input {0}\n".format(params.cluster_file))
f.write("emitters {0:d} l(A)\n".format(len(params.emitters)))
@@ -173,7 +173,7 @@ class EdacCalculator(calculator.Calculator):
f.write(" ".join(format(order, "d") for order in params.orders) + "\n")
f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0))
# scattering factor output (see project.Params.phase_output_classes)
# scattering factor output (see project.CalculatorParams.phase_output_classes)
if params.phase_output_classes is not None:
fn = "{0}.clu".format(params.output_file)
f.write("cluster output l(A) {fn}\n".format(fn=fn))
@@ -197,7 +197,7 @@ class EdacCalculator(calculator.Calculator):
"""
run EDAC with the given parameters and cluster.
@param params: a pmsco.project.Params object with all necessary values except cluster and output files set.
@param params: a pmsco.project.CalculatorParams object with all necessary values except cluster and output files set.
@param cluster: a pmsco.cluster.Cluster(format=FMT_EDAC) object with all atom positions set.
@@ -219,8 +219,10 @@ class EdacCalculator(calculator.Calculator):
dat_filename = out_filename
if params.fixed_cluster:
etpi_filename = base_filename + ".etpai"
dtype = md.DTYPE_ETPAI
else:
etpi_filename = base_filename + ".etpi"
dtype = md.DTYPE_ETPI
# fix EDAC particularities
params.cluster_file = clu_filename
@@ -246,13 +248,10 @@ class EdacCalculator(calculator.Calculator):
result_etpi['e'] -= params.work_function
if 't' in scan.mode and 'p' in scan.mode:
hemi_tpi = scan.raw_data.copy()
hemi_tpi['i'] = 0.0
try:
hemi_tpi['s'] = 0.0
except ValueError:
pass
result_etpi = md.interpolate_hemi_scan(result_etpi, hemi_tpi)
dest_tpi = np.zeros(scan.raw_data.shape, dtype)
dest_tpi['t'] = scan.thetas
dest_tpi['p'] = scan.phis
result_etpi = md.interpolate_hemi_scan(result_etpi, dest_tpi)
if params.fixed_cluster:
expected_shape = max(scan.energies.shape[0], 1) * max(scan.alphas.shape[0], 1)

View File

@@ -18,7 +18,7 @@ from __future__ import division
from __future__ import print_function
import pmsco.calculators.calculator as calculator
import pmsco.data as md
import pmsco.msc.msc as msc
import subprojects.msc.msc as msc
import logging
logger = logging.getLogger(__name__)
@@ -62,7 +62,7 @@ class MscCalculator(calculator.Calculator):
"""
run the MSC program with the given parameters and cluster.
@param params: a project.Params() object with all necessary values except cluster and output files set.
@param params: a project.CalculatorParams() object with all necessary values except cluster and output files set.
@param cluster: a cluster.Cluster(format=FMT_MSC) object with all atom positions set.

View File

@@ -1,43 +0,0 @@
SHELL=/bin/sh
# makefile for PHAGEN program and module
#
# the PHAGEN source code is not included in the public distribution.
# please obtain the PHAGEN code from the original author,
# and copy it to this directory before compilation.
#
# see the top-level makefile for additional information.
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
.PHONY: all clean phagen
FC?=gfortran
F2PY?=f2py
F2PYOPTS?=
CC?=gcc
CCOPTS?=
SWIG?=swig
SWIGOPTS?=
PYTHON?=python
PYTHONOPTS?=
PYTHONINC?=
PYTHON_CONFIG = ${PYTHON}-config
PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
PYTHON_EXT_SUFFIX ?= $(shell ${PYTHON_CONFIG} --extension-suffix)
all: phagen
phagen: phagen.exe phagen$(EXT_SUFFIX)
phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc
$(FC) $(FCOPTS) -o phagen.exe phagen_scf.f
phagen.pyf: | phagen_scf.f
$(F2PY) -h phagen.pyf -m phagen phagen_scf.f only: libmain
phagen$(EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc
$(F2PY) -c $(F2PYOPTS) -m phagen phagen.pyf phagen_scf.f
clean:
rm -f *.so *.o *.exe

View File

@@ -1,102 +0,0 @@
--- phagen_scf.orig.f 2019-06-05 16:45:52.977855859 +0200
+++ phagen_scf.f 2019-05-09 16:32:35.790286429 +0200
@@ -174,6 +174,99 @@
1100 format(//,1x,' ** phagen terminated normally ** ',//)
end
+
+c-----------------------------------------------------------------------
+ subroutine libmain(infile,outfile,etcfile)
+c main calculation routine
+c entry point for external callers
+c
+c infile: name of parameter input file
+c
+c outfile: base name of output files
+c output files with endings .list, .clu, .pha, .tl, .rad
+c will be created
+c-----------------------------------------------------------------------
+ implicit real*8 (a-h,o-z)
+c
+ include 'msxas3.inc'
+ include 'msxasc3.inc'
+
+ character*60 infile,outfile,etcfile
+ character*70 listfile,clufile,tlfile,radfile,phafile
+
+c
+c.. constants
+ antoau = 0.52917715d0
+ pi = 3.141592653589793d0
+ ev = 13.6058d0
+ zero = 0.d0
+c.. threshold for linearity
+ thresh = 1.d-4
+c.. fortran io units
+ idat = 5
+ iwr = 6
+ iphas = 30
+ iedl0 = 31
+ iwf = 32
+ iof = 17
+
+ iii=LnBlnk(outfile)+1
+ listfile=outfile
+ listfile(iii:)='.list'
+ clufile=outfile
+ clufile(iii:)='.clu'
+ phafile=outfile
+ phafile(iii:)='.pha'
+ tlfile=outfile
+ tlfile(iii:)='.tl'
+ radfile=outfile
+ radfile(iii:)='.rad'
+
+ open(idat,file=infile,form='formatted',status='old')
+ open(iwr,file=listfile,form='formatted',status='unknown')
+ open(10,file=clufile,form='formatted',status='unknown')
+ open(35,file=tlfile,form='formatted',status='unknown')
+ open(55,file=radfile,form='formatted',status='unknown')
+ open(iphas,file=phafile,form='formatted',status='unknown')
+
+ open(iedl0,form='unformatted',status='scratch')
+ open(iof,form='unformatted',status='scratch')
+ open(unit=21,form='unformatted',status='scratch')
+ open(60,form='formatted',status='scratch')
+ open(50,form='formatted',status='scratch')
+ open(unit=13,form='formatted',status='scratch')
+ open(unit=14,form='formatted',status='scratch')
+ open(unit=11,status='scratch')
+ open(unit=iwf,status='scratch')
+ open(unit=33,status='scratch')
+ open(unit=66,status='scratch')
+
+ call inctrl
+ call intit(iof)
+ call incoor
+ call calphas
+
+ close(idat)
+ close(iwr)
+ close(10)
+ close(35)
+ close(55)
+ close(iphas)
+ close(iedl0)
+ close(iof)
+ close(60)
+ close(50)
+ close(13)
+ close(14)
+ close(11)
+ close(iwf)
+ close(33)
+ close(66)
+ close(21)
+
+ endsubroutine
+
+
subroutine inctrl
implicit real*8 (a-h,o-z)
include 'msxas3.inc'

View File

@@ -2,33 +2,41 @@
@package pmsco.calculators.phagen.runner
Natoli/Sebilleau PHAGEN interface
this module runs the PHAGEN program to calculate scattering factors and radial matrix element.
This module runs the PHAGEN program to calculate scattering factors and radial matrix elements.
Requires PHAGEN version 2.2 from https://git.ipr.univ-rennes.fr/epsi/msspec_python3.git (contained in subprojects).
@author Matthias Muntwiler
@copyright (c) 2015-19 by Paul Scherrer Institut @n
@copyright (c) 2015-23 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import os
import shutil
import tempfile
from pathlib import Path
import sys
from pmsco.calculators.calculator import AtomicCalculator
from pmsco.calculators.phagen.phagen import libmain
from pmsco.calculators.phagen.translator import Translator
import pmsco.cluster
from pmsco.helpers import stdout_redirected
import pmsco.project
logger = logging.getLogger(__name__)
try:
import phagen
except (ImportError, ModuleNotFoundError) as e:
phagen = None
logger.critical("Error importing the phagen package.", exc_info=e)
class PhagenCalculator(AtomicCalculator):
"""
@@ -37,7 +45,11 @@ class PhagenCalculator(AtomicCalculator):
this produces scatterer, radial matrix element and cluster files for EDAC.
"""
def run(self, params, cluster, scan, output_file):
def run(self,
params: pmsco.project.CalculatorParams,
cluster: pmsco.cluster.Cluster,
scan: pmsco.project.Scan,
output_file: str):
"""
create the input file, run PHAGEN, and translate the output to EDAC format.
@@ -61,7 +73,7 @@ class PhagenCalculator(AtomicCalculator):
because PHAGEN generates a lot of files with hard-coded names,
the function creates a temporary directory for PHAGEN and deletes it before returning.
@param params: pmsco.project.Params object.
@param params: pmsco.project.CalculatorParams object.
the phase_files attribute is updated with the paths of the scattering files.
@param cluster: pmsco.cluster.Cluster object.
@@ -76,6 +88,8 @@ class PhagenCalculator(AtomicCalculator):
@return (None, dict) where dict is a list of output files with their category.
the category is "atomic" for all output files.
"""
assert cluster.get_emitter_count() == 1, "PHAGEN cannot handle more than one emitter at a time"
transl = Translator()
transl.params.set_params(params)
transl.params.set_cluster(cluster)
@@ -83,19 +97,16 @@ class PhagenCalculator(AtomicCalculator):
phagen_cluster = pmsco.cluster.Cluster()
files = {}
prev_wd = os.getcwd()
prev_wd = Path.cwd()
try:
with tempfile.TemporaryDirectory() as temp_dir:
os.chdir(temp_dir)
os.mkdir("div")
os.mkdir("div/wf")
os.mkdir("plot")
os.mkdir("data")
# prepare input for phagen
infile = "phagen.in"
outfile = "phagen.out"
temp_path = Path(temp_dir)
in_path = temp_path / "input"
in_path.mkdir(exist_ok=True)
out_path = temp_path / "output"
out_path.mkdir(exist_ok=True)
infile = in_path / "input.ms"
try:
transl.write_input(infile)
report_infile = os.path.join(prev_wd, output_file + ".phagen.in")
@@ -104,12 +115,22 @@ class PhagenCalculator(AtomicCalculator):
except IOError:
logger.warning("error writing phagen input file {fi}.".format(fi=infile))
# call phagen
libmain(infile, outfile)
report_listfile = os.path.join(prev_wd, output_file + ".phagen.list")
files[report_listfile] = "log"
# call phagen, redirect stdout (unit 6)
os.chdir(out_path)
with open(report_listfile, "wb") as f:
with stdout_redirected(f):
phagen.phagen()
phafile = out_path / "div" / "phases.dat"
radfile = out_path / "fort.55"
# tlfile = out_path / "fort.35"
clufile = out_path / "clus" / "clus.out"
# collect results
try:
phafile = outfile + ".pha"
transl.parse_phagen_phase(phafile)
report_phafile = os.path.join(prev_wd, output_file + ".phagen.pha")
shutil.copy(phafile, report_phafile)
@@ -118,7 +139,6 @@ class PhagenCalculator(AtomicCalculator):
logger.error("error loading phagen phase file {fi}".format(fi=phafile))
try:
radfile = outfile + ".rad"
transl.parse_radial_file(radfile)
report_radfile = os.path.join(prev_wd, output_file + ".phagen.rad")
shutil.copy(radfile, report_radfile)
@@ -127,7 +147,6 @@ class PhagenCalculator(AtomicCalculator):
logger.error("error loading phagen radial file {fi}".format(fi=radfile))
try:
clufile = outfile + ".clu"
phagen_cluster.load_from_file(clufile, pmsco.cluster.FMT_PHAGEN_OUT)
except IOError:
logger.error("error loading phagen cluster file {fi}".format(fi=clufile))
@@ -138,12 +157,13 @@ class PhagenCalculator(AtomicCalculator):
# write edac files
scatfile = output_file + "_{}.scat"
scatfiles = transl.write_edac_scattering(scatfile)
params.phase_files = {c: scatfiles[c] for c in scatfiles}
files.update({scatfiles[c]: "atomic" for c in scatfiles})
params.phase_files = scatfiles.copy()
files.update({f: "atomic" for f in params.phase_files.values()})
rmefile = output_file + ".rme"
transl.write_edac_emission(rmefile)
files[rmefile] = "atomic"
rmefile = output_file + "_{}.rme"
rmefiles = transl.write_edac_emission(rmefile)
params.rme_files = rmefiles.copy()
files.update({f: "atomic" for f in params.rme_files.values()})
cluster.update_atoms(phagen_cluster, {'c'})
clufile = output_file + ".pmsco.clu"

View File

@@ -13,13 +13,12 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
from pmsco.compat import open
from pmsco.cluster import Cluster
logger = logging.getLogger(__name__)
## rydberg energy in electron volts
ERYDBERG = 13.6056923
@@ -58,7 +57,7 @@ class TranslationParams(object):
self.initial_state = "1s"
self.binding_energy = 0.
self.cluster = None
self.kinetic_energies = np.empty(0, dtype=np.float)
self.kinetic_energies = np.empty(0, dtype=float)
@property
def l_init(self):
@@ -72,7 +71,7 @@ class TranslationParams(object):
"""
set the translation parameters.
@param params: a pmsco.project.Params object or
@param params: a pmsco.project.CalculatorParams object or
a dictionary containing some or all public fields of this class.
@return: None
"""
@@ -125,6 +124,44 @@ class Translator(object):
6. call write_edac_scattering to produce the EDAC scattering matrix files.
7. call write_edac_emission to produce the EDAC emission matrix file.
"""
## @var params
#
# project parameters needed for translation.
#
# fill the attributes of this object before using any translator methods.
## @var scattering
#
# t-matrix storage
#
# the t-matrix is stored in a flat, one-dimensional numpy structured array consisting of the following fields:
# @arg e (float) energy (eV)
# @arg a (int) atom index (1-based)
# @arg l (int) angular momentum quantum number l
# @arg t (complex) scattering matrix element, t = exp(-i * delta) * sin delta
#
# @note PHAGEN uses the convention t = exp(-i * delta) * sin delta,
# whereas EDAC uses t = exp(i * delta) * sin delta (complex conjugate).
# this object stores the t-matrix according to the PHAGEN convention.
# the conversion to the EDAC convention occurs in write_edac_scattering_file().
## @var emission
#
# radial matrix element storage
#
# the radial matrix elemnts are stored in a flat, one-dimensional numpy structured array
# consisting of the following fields:
# @arg e (float) energy (eV)
# @arg dw (complex) matrix element for the transition to l-1
# @arg up (complex) matrix element for the transition to l+1
## @var cluster
#
# cluster object for PHAGEN
#
# this object is created by translate_cluster().
def __init__(self):
"""
initialize the object instance.
@@ -134,18 +171,33 @@ class Translator(object):
self.scattering = np.empty(0, dtype=dt)
dt = [('e', 'f4'), ('dw', 'c16'), ('up', 'c16')]
self.emission = np.empty(0, dtype=dt)
self.cluster = None
def translate_cluster(self):
"""
translate the cluster into a form suitable for PHAGEN.
specifically, move the (first and hopefully only) emitter to the first atom position.
the method copies the cluster from self.params into a new object
and stores it under self.cluster.
@return: None
"""
self.cluster = Cluster()
self.cluster.copy_from(self.params.cluster)
ems = self.cluster.get_emitters(['i'])
self.cluster.move_to_first(idx=ems[0][0]-1)
def write_cluster(self, f):
"""
write the cluster section of the PHAGEN input file.
requires a valid pmsco.cluster.Cluster in self.params.cluster.
@param f: file or output stream (an object with a write method)
@return: None
"""
for atom in self.params.cluster.data:
for atom in self.cluster.data:
d = {k: atom[k] for k in atom.dtype.names}
f.write("{s} {t} {x} {y} {z}\n".format(**d))
f.write("-1 -1 0. 0. 0.\n")
@@ -163,7 +215,7 @@ class Translator(object):
@return: None
"""
data = self.params.cluster.data
data = self.cluster.data
elements = np.unique(data['t'])
for element in elements:
idx = np.where(data['t'] == element)
@@ -181,29 +233,34 @@ class Translator(object):
@return: None
"""
phagen_params = {}
self.translate_cluster()
phagen_params['absorber'] = 1
phagen_params['emin'] = self.params.kinetic_energies.min() / ERYDBERG
phagen_params['emax'] = self.params.kinetic_energies.max() / ERYDBERG
if self.params.kinetic_energies.shape[0] > 1:
phagen_params['delta'] = (phagen_params['emax'] - phagen_params['emin']) / \
(self.params.kinetic_energies.shape[0] - 1)
if phagen_params['delta'] < 0.0001:
else:
phagen_params['delta'] = 0.1
phagen_params['edge'] = state_to_edge(self.params.initial_state) # possibly not used
phagen_params['edge'] = state_to_edge(self.params.initial_state)
phagen_params['edge1'] = 'm4' # auger not supported
phagen_params['edge2'] = 'm4' # auger not supported
phagen_params['cip'] = self.params.binding_energy / ERYDBERG
if phagen_params['cip'] < 0.001:
raise ValueError("binding energy parameter is zero.")
if np.sum(np.abs(self.params.cluster.data['q']) >= 0.001) > 0:
if np.sum(np.abs(self.cluster.data['q'])) > 0.:
phagen_params['ionzst'] = 'ionic'
else:
phagen_params['ionzst'] = 'neutral'
if hasattr(f, "write"):
if hasattr(f, "write") and callable(f.write):
f.write("&job\n")
f.write("calctype='xpd',\n")
f.write("coor='angs',\n")
f.write("cip={cip},\n".format(**phagen_params))
f.write("absorber={absorber},\n".format(**phagen_params))
f.write("edge='{edge}',\n".format(**phagen_params))
f.write("edge1='{edge1}',\n".format(**phagen_params))
f.write("edge2='{edge1}',\n".format(**phagen_params))
@@ -228,7 +285,7 @@ class Translator(object):
self.write_cluster(f)
self.write_ionicity(f)
else:
with open(f, "w") as fi:
with open(f, "wt", encoding="latin1") as fi:
self.write_input(fi)
def parse_phagen_phase(self, f):
@@ -254,13 +311,18 @@ class Translator(object):
@arg l angular momentum quantum number l
@arg t complex scattering matrix element
@note PHAGEN uses the convention t = exp(-i * delta) * sin delta,
whereas EDAC uses t = exp(i * delta) * sin delta (complex conjugate).
this class stores the t-matrix according to the PHAGEN convention.
the conversion to the EDAC convention occurs in write_edac_scattering_file().
@param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
@return: None
"""
dt = [('e', 'f4'), ('x1', 'f4'), ('x2', 'f4'), ('na', 'i4'), ('nl', 'i4'),
('tr', 'f8'), ('ti', 'f8'), ('ph', 'f4')]
data = np.genfromtxt(f, dtype=dt)
data = np.atleast_1d(np.genfromtxt(f, dtype=dt))
self.scattering = np.resize(self.scattering, data.shape)
scat = self.scattering
@@ -308,7 +370,7 @@ class Translator(object):
@return: None
"""
if hasattr(f, "write"):
if hasattr(f, "write") and callable(f.write):
energies = np.unique(scat['e'])
ne = energies.shape[0]
lmax = scat['l'].max()
@@ -323,12 +385,12 @@ class Translator(object):
if ne > 1:
f.write("{0:.3f} ".format(energy))
for item in energy_scat:
f.write(" {0:.6f} {1:.6f}".format(item['t'].real, item['t'].imag))
f.write(" {0:.6f} {1:.6f}".format(item['t'].real, -item['t'].imag))
for i in range(len(energy_scat), lmax + 1):
f.write(" 0 0")
f.write("\n")
else:
with open(f, "w") as fi:
with open(f, "wt", encoding="latin1") as fi:
self.write_edac_scattering_file(fi, scat)
def write_edac_phase_file(self, f, scat):
@@ -341,7 +403,7 @@ class Translator(object):
@return: None
"""
if hasattr(f, "write"):
if hasattr(f, "write") and callable(f.write):
energies = np.unique(scat['e'])
ne = energies.shape[0]
lmax = scat['l'].max()
@@ -356,31 +418,42 @@ class Translator(object):
if ne > 1:
f.write("{0:.3f} ".format(energy))
for item in energy_scat:
f.write(" {0:.6f}".format(np.angle(item['t'])))
pha = np.sign(item['t'].real) * np.arcsin(np.sqrt(np.abs(item['t'].imag)))
f.write(" {0:.6f}".format(pha))
for i in range(len(energy_scat), lmax + 1):
f.write(" 0")
f.write("\n")
else:
with open(f, "w") as fi:
with open(f, "wt", encoding="latin1") as fi:
self.write_edac_phase_file(fi, scat)
def parse_radial_file(self, f):
"""
parse the radial matrix element output file from phagen.
parse the radial matrix element output file from phagen version 2.2.
the file contains 7 header lines and one data line per requested energy.
the data line contains real and imaginary parts of the matrix elements.
the first four columns contain the electric dipole transitions Rd(li --> li - 1) and Rd(li --> li + 1),
followed by higher orders that we do not use here.
@param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
@return: None
@raise ValueError if the file is in a wrong format.
"""
dt = [('ar', 'f8'), ('ai', 'f8'), ('br', 'f8'), ('bi', 'f8')]
data = np.genfromtxt(f, dtype=dt)
data = np.atleast_2d(np.genfromtxt(f, skip_header=7))
if data.shape[0] != self.params.kinetic_energies.shape[0] or data.shape[1] < 4:
raise ValueError(f"Unexpected array size of Phagen radial matrix elements output: "
f"expected ({self.params.kinetic_energies.shape[0]}, >= 4), received {data.shape}")
self.emission = np.resize(self.emission, data.shape)
self.emission = np.resize(self.emission, data.shape[0:1])
emission = self.emission
emission['dw'] = data['ar'] + 1j * data['ai']
emission['up'] = data['br'] + 1j * data['bi']
emission['e'] = self.params.kinetic_energies
emission['dw'] = data[:, 0] + 1j * data[:, 1]
emission['up'] = data[:, 2] + 1j * data[:, 3]
def write_edac_emission(self, f):
def write_edac_emission_file(self, f):
"""
write the radial photoemission matrix element in EDAC format.
@@ -390,7 +463,7 @@ class Translator(object):
@return: None
"""
if hasattr(f, "write"):
if hasattr(f, "write") and callable(f.write):
l0 = self.params.l_init
energies = self.params.kinetic_energies
emission = self.emission
@@ -407,5 +480,24 @@ class Translator(object):
f.write(" {0:.6f} {1:.6f}".format(item['dw'].real, item['dw'].imag))
f.write("\n")
else:
with open(f, "w") as of:
self.write_edac_emission(of)
with open(f, "wt", encoding="latin1") as of:
self.write_edac_emission_file(of)
def write_edac_emission(self, filename_format):
"""
write the radial photoemission matrix element in EDAC format.
requires self.scattering, self.emission, self.params.kinetic_energies and self.params.initial_state.
@param filename_format: file name including, optionally, a placeholder {} for the atom class.
since phagen calculates only one emitter, the placeholder is not necessary.
@return: dictionary that maps atom classes to file names.
since phagen calculates only one emitter, this dictionary will contain just one entry.
"""
scat = self.scattering
atom = scat['a'][0]
f = filename_format.format(atom)
self.write_edac_emission_file(f)
files = {atom: f}
return files

View File

@@ -17,22 +17,20 @@ pip install --user periodictable
@author Matthias Muntwiler
@copyright (c) 2015-19 by Paul Scherrer Institut @n
@copyright (c) 2015-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import periodictable as pt
import sys
import pmsco.config as config
## default file format identifier
FMT_DEFAULT = 0
## MSC file format identifier
@@ -54,14 +52,14 @@ if sys.version_info[0] >= 3:
else:
_SYMBOL_TYPE = 'S2'
## numpy.array datatype of Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
('e', 'u1'), ('q', 'f4'), ('c', 'i4')]
## file format of internal Cluster.data array
## numpy.array datatype of internal Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('c', 'i4'),
('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('e', 'u1'), ('q', 'f4')]
## string formatting of native file format
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"]
## field (column) names of internal Cluster.data array
## field (column) names of native file format
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q']
## column names for export
## column names of native file format
NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z',
'e': 'emitter', 'q': 'charge'}
@@ -178,12 +176,12 @@ class Cluster(object):
# @arg @c 'i' (int) atom index (1-based)
# @arg @c 't' (int) atom type (chemical element number)
# @arg @c 's' (string) chemical element symbol
# @arg @c 'c' (int) scatterer class
# @arg @c 'x' (float32) x coordinate of the atom position
# @arg @c 'y' (float32) t coordinate of the atom position
# @arg @c 'z' (float32) z coordinate of the atom position
# @arg @c 'e' (uint8) 1 = emitter, 0 = regular atom
# @arg @c 'q' (float32) charge/ionicity
# @arg @c 'c' (int) scatterer class
## @var comment (str)
# one-line comment that can be included in some cluster files
@@ -227,13 +225,13 @@ class Cluster(object):
"""
self.rmax = r
def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer=0):
def build_element(self, index, element, x, y, z, emitter, charge=0., scatterer_class=0):
"""
build a tuple in the format of the internal data array.
@param index: (int) index
@param element_number: (int) chemical element number
@param element: chemical element number (int) or symbol (str)
@param x, y, z: (float) atom coordinates in the cluster
@@ -241,17 +239,23 @@ class Cluster(object):
@param charge: (float) ionicity. default = 0
@param scatterer: (int) scatterer class. default = 0.
@param scatterer_class: (int) scatterer class. default = 0.
"""
try:
element_number = int(element)
symbol = pt.elements[element_number].symbol
element = (index, element_number, symbol, x, y, z, int(emitter), charge, scatterer)
except ValueError:
symbol = element
element_number = pt.elements.symbol(symbol.strip()).number
element = (index, element_number, symbol, scatterer_class, x, y, z, int(emitter), charge)
return element
def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.):
"""
add a single atom to the cluster.
@param atomtype: (int) chemical element number
@param atomtype: chemical element number (int) or symbol (str)
@param v_pos: (numpy.ndarray, shape = (3)) position vector
@@ -274,7 +278,7 @@ class Cluster(object):
self.rmax (maximum distance from the origin).
all atoms are non-emitters.
@param atomtype: (int) chemical element number
@param atomtype: chemical element number (int) or symbol (str)
@param v_pos: (numpy.ndarray, shape = (3))
position vector of the first atom (basis vector)
@@ -284,17 +288,18 @@ class Cluster(object):
"""
r_great = max(self.rmax, np.linalg.norm(v_pos))
n0 = self.data.shape[0] + 1
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 3) * 2
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 3) * 2
nn = 0
buf = np.empty((2 * n1 + 1) * (2 * n2 + 1), dtype=self.dtype)
for i1 in range(-n1, n1 + 1):
for i2 in range(-n2, n2 + 1):
v = v_pos + v_lat1 * i1 + v_lat2 * i2
if np.linalg.norm(v) <= self.rmax:
buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0)
nn += 1
buf = np.resize(buf, nn)
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3
idx = np.mgrid[-n1:n1+1, -n2:n2+1]
idx = idx.reshape(idx.shape[0], -1)
lat = np.array([v_lat1, v_lat2])
v = v_pos + np.matmul(idx.T, lat)
rsq = np.sum(np.square(v), axis=-1)
b1 = rsq <= self.rmax**2
sel = b1.nonzero()[0]
buf = np.empty((len(sel)), dtype=self.dtype)
for nn, ii in enumerate(sel):
buf[nn] = self.build_element(nn + n0, atomtype, v[ii, 0], v[ii, 1], v[ii, 2], 0)
self.data = np.append(self.data, buf)
def add_bulk(self, atomtype, v_pos, v_lat1, v_lat2, v_lat3, z_surf=0.0):
@@ -306,7 +311,7 @@ class Cluster(object):
and z_surf (position of the surface).
all atoms are non-emitters.
@param atomtype: (int) chemical element number
@param atomtype: chemical element number (int) or symbol (str)
@param v_pos: (numpy.ndarray, shape = (3))
position vector of the first atom (basis vector)
@@ -322,16 +327,18 @@ class Cluster(object):
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3
n3 = max(int(r_great / np.linalg.norm(v_lat3)) + 1, 4) * 3
nn = 0
buf = np.empty((2 * n1 + 1) * (2 * n2 + 1) * (n3 + 1), dtype=self.dtype)
for i1 in range(-n1, n1 + 1):
for i2 in range(-n2, n2 + 1):
for i3 in range(-n3, n3 + 1):
v = v_pos + v_lat1 * i1 + v_lat2 * i2 + v_lat3 * i3
if np.linalg.norm(v) <= self.rmax and v[2] <= z_surf:
buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0)
nn += 1
buf = np.resize(buf, nn)
idx = np.mgrid[-n1:n1+1, -n2:n2+1, -n3:n3+1]
idx = idx.reshape(idx.shape[0], -1)
lat = np.array([v_lat1, v_lat2, v_lat3])
v = v_pos + np.matmul(idx.T, lat)
rsq = np.sum(np.square(v), axis=-1)
b1 = rsq <= self.rmax**2
b2 = v[:, 2] <= z_surf
ba = np.all([b1, b2], axis=0)
sel = ba.nonzero()[0]
buf = np.empty((len(sel)), dtype=self.dtype)
for nn, ii in enumerate(sel):
buf[nn] = self.build_element(nn + n0, atomtype, v[ii, 0], v[ii, 1], v[ii, 2], 0)
self.data = np.append(self.data, buf)
def add_cluster(self, cluster, check_rmax=False, check_unique=False, tol=0.001):
@@ -426,15 +433,47 @@ class Cluster(object):
idx = np.where(b_all)
self.data['z'][idx] += z_shift
return idx
return idx[0]
def translate(self, vector, element=0):
def get_center(self, element=None):
"""
get the geometric center of the cluster or a class of atoms.
@param element: chemical element number (int) or symbol (str)
if atoms of a specific element should be considered only.
by default (element == None or 0 or ""),
all atoms are included in the calculation.
@return: (numpy.ndarray) 3-dimensional vector.
"""
if element:
try:
sel = self.data['t'] == int(element)
except ValueError:
sel = self.data['s'] == element
else:
sel = np.ones_like(self.data['t'])
idx = np.where(sel)
center = np.zeros(3)
center[0] = np.mean(self.data['x'][idx])
center[1] = np.mean(self.data['y'][idx])
center[2] = np.mean(self.data['z'][idx])
return center
def translate(self, vector, element=None):
"""
translate the cluster or all atoms of a specified element.
translation shifts each selected atom by the given vector.
@param vector: (numpy.ndarray) 3-dimensional displacement vector.
@param element: (int) chemical element number if atoms of a specific element should be affected.
by default (element = 0), all atoms are moved.
@param element: chemical element number (int) or symbol (str)
if atoms of a specific element should be affected only.
by default (element == None or 0 or ""),
all atoms are translated.
@return: (numpy.ndarray) indices of the atoms that have been shifted.
"""
if element:
@@ -449,7 +488,7 @@ class Cluster(object):
self.data['y'][idx] += vector[1]
self.data['z'][idx] += vector[2]
return idx
return idx[0]
def matrix_transform(self, matrix):
"""
@@ -461,47 +500,49 @@ class Cluster(object):
@return: None
"""
for atom in self.data:
v = np.matrix([atom['x'], atom['y'], atom['z']])
w = matrix * v.transpose()
atom['x'] = float(w[0])
atom['y'] = float(w[1])
atom['z'] = float(w[2])
pos = np.empty((3, self.data.shape[0]), np.float32)
pos[0, :] = self.data['x']
pos[1, :] = self.data['y']
pos[2, :] = self.data['z']
pos = np.matmul(matrix, pos)
self.data['x'] = pos[0, :]
self.data['y'] = pos[1, :]
self.data['z'] = pos[2, :]
def rotate_x(self, angle):
"""
rotate cluster about the surface normal axis
rotate cluster about the x-axis
@param angle (float) in degrees
"""
angle = math.radians(angle)
s = math.sin(angle)
c = math.cos(angle)
matrix = np.matrix([[1, 0, 0], [0, c, -s], [0, s, c]])
matrix = np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
self.matrix_transform(matrix)
def rotate_y(self, angle):
"""
rotate cluster about the surface normal axis
rotate cluster about the y-axis
@param angle (float) in degrees
"""
angle = math.radians(angle)
s = math.sin(angle)
c = math.cos(angle)
matrix = np.matrix([[c, 0, s], [0, 1, 0], [-s, 0, c]])
matrix = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
self.matrix_transform(matrix)
def rotate_z(self, angle):
"""
rotate cluster about the surface normal axis
rotate cluster about the z-axis (surface normal)
@param angle (float) in degrees
"""
angle = math.radians(angle)
s = math.sin(angle)
c = math.cos(angle)
matrix = np.matrix([[c, -s, 0], [s, c, 0], [0, 0, 1]])
matrix = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
self.matrix_transform(matrix)
def find_positions(self, pos, tol=0.001):
@@ -794,6 +835,53 @@ class Cluster(object):
idx = self.data['e'] != 0
return np.sum(idx)
def calc_scattering_angles(self, index_emitter, radius):
"""
calculate forward-scattering angles of the cluster atoms
for each atom within a given radius of the emitter,
the connecting vector between emitter and scatterer is calculated
and returned in cartesian and polar coordinates.
@param index_emitter: atom index of the emitter.
all angles are calculated with respect to this atom.
@param radius: include only atoms within this radius of the emitter.
@note back-scattering angles can be obtained by inverting the angle on the unit sphere:
th' = 180 - th, ph' = -ph.
@return dictionary with results.
each item is a numpy.ndarray of shape (N, M)
where N is the number of scatterers
and M = 3 for dict['xyz'] and M = 1 otherwise.
@arg dict['index']: atom index into the cluster array.
@arg dict['xyz']: connecting vector between the emitter and the atom in cartesian coordinates.
@arg dict['dist']: distance between the emitter and the atom.
@arg dict['polar']: polar angle with respect to the z-axis.
@arg dict['azimuth']: azimuthal angle with respect to the x-axis.
"""
# position of emitter atom
em = self.data[index_emitter]
em = np.asarray((em['x'], em['y'], em['z']))
# relative positions of scattering atoms
xyz = self.get_positions()
xyz -= em
dist = np.linalg.norm(xyz, axis=1)
sel1 = dist <= radius
sel2 = dist > 0.
idx = np.where(np.all([sel1, sel2], axis=0))
xyz = xyz[idx]
dist = dist[idx]
# angles
v1 = np.asarray([0, 0, 1])
v2 = np.transpose(xyz / dist.reshape((dist.shape[0], 1)))
th = np.degrees(np.arccos(np.clip(np.dot(v1, v2), -1., 1.)))
ph = np.degrees(np.arctan2(v2[1], v2[0]))
return {'index': idx[0], 'xyz': xyz, 'dist': dist, 'polar': th, 'azimuth': ph}
def load_from_file(self, f, fmt=FMT_DEFAULT):
"""
load a cluster from a file created by the scattering program.
@@ -848,7 +936,7 @@ class Cluster(object):
else:
raise ValueError("unknown file format {}".format(fmt))
data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
data = np.atleast_1d(np.genfromtxt(f, dtype=dtype, skip_header=sh))
if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1:
data = data[:-1]
@@ -920,7 +1008,7 @@ class Cluster(object):
or left at the default value 0 in which case PMSCO sets the correct values.
if the scattering factors are loaded from existing files,
the atom class corresponds to the key of the pmsco.project.Params.phase_files dictionary.
the atom class corresponds to the key of the pmsco.project.CalculatorParams.phase_files dictionary.
in this case the meaning of the class value is up to the project,
and the class must be set either by the cluster generator
or the project's after_atomic_scattering hook.
@@ -956,7 +1044,7 @@ class Cluster(object):
the other cluster must contain the same atoms (same coordinates) in a possibly random order.
the atoms of this and the other cluster are matched up by sorting them by coordinate.
atomic scattering calculators often change the order of atoms in a cluster based on symmetry,
atomic scattering calculators often change the order of atoms in a cluster based on domain,
and return atom classes versus atomic coordinates.
this method allows to import the atom classes into the original cluster.
@@ -1049,7 +1137,7 @@ class Cluster(object):
np.savetxt(f, data, fmt=file_format, header=header, comments="")
class ClusterGenerator(object):
class ClusterGenerator(config.ConfigurableObject):
"""
cluster generator class.
@@ -1067,13 +1155,14 @@ class ClusterGenerator(object):
@param project: reference to the project object.
cluster generators may need to look up project parameters.
"""
super().__init__()
self.project = project
def count_emitters(self, model, index):
"""
return the number of emitter configurations for a particular model, scan and symmetry.
return the number of emitter configurations for a particular model, scan and domain.
the number of emitter configurations may depend on the model parameters, scan index and symmetry index.
the number of emitter configurations may depend on the model parameters, scan index and domain index.
by default, the method returns 1, which means that there is only one emitter configuration.
emitter configurations are mainly a way to distribute the calculations to multiple processes
@@ -1100,9 +1189,9 @@ class ClusterGenerator(object):
@param index (named tuple CalcID) calculation index.
the method should consider only the following attributes:
@arg @c scan scan index (index into Project.scans)
@arg @c sym symmetry index (index into Project.symmetries)
@arg @c emit emitter index must be -1.
@arg scan scan index (index into Project.scans)
@arg domain domain index (index into Project.domains)
@arg emit emitter index must be -1.
@return number of emitter configurations.
this implementation returns the default value of 1.
@@ -1114,23 +1203,23 @@ class ClusterGenerator(object):
create a Cluster object given the model parameters and calculation index.
the generated cluster will typically depend on the model parameters.
depending on the project, it may also depend on the scan index, symmetry index and emitter index.
depending on the project, it may also depend on the scan index, domain index and emitter index.
the scan index can be used to generate a different cluster for different scan geometry,
e.g., if some atoms can be excluded due to a longer mean free path.
if this is not the case for the specific project, the scan index can be ignored.
the symmetry index may select a particular domain that has a different atomic arrangement.
in this case, depending on the value of index.sym, the function must generate a cluster corresponding
to the particular domain/symmetry.
the method can ignore the symmetry index if the project defines only one symmetry,
or if the symmetry does not correspond to a different atomic structure.
the domain index may select a particular domain that has a different atomic arrangement.
in this case, depending on the value of index.domain, the function must generate a cluster corresponding
to the particular domain.
the method can ignore the domain index if the project defines only one domain,
or if the domain does not correspond to a different atomic structure.
the emitter index selects a particular emitter configuration.
depending on the value of the emitter index, the method must react differently:
1. if the value is -1, return the full cluster and mark all inequivalent emitter atoms.
emitters which are reproduced by a symmetry expansion in combine_emitters() should not be marked.
emitters which are reproduced by a domain expansion in combine_emitters() should not be marked.
the full diffraction scan will be calculated in one calculation.
2. if the value is greater or equal to zero, generate the cluster with the emitter configuration
@@ -1152,9 +1241,9 @@ class ClusterGenerator(object):
@param index (named tuple CalcID) calculation index.
the method should consider only the following attributes:
@arg @c scan scan index (index into Project.scans)
@arg @c sym symmetry index (index into Project.symmetries)
@arg @c emit emitter index.
@arg scan scan index (index into Project.scans)
@arg domain domain index (index into Project.domains)
@arg emit emitter index.
if -1, generate the full cluster and mark all emitters.
if greater or equal to zero, the value is a zero-based index of the emitter configuration.
@@ -1174,7 +1263,7 @@ class LegacyClusterGenerator(ClusterGenerator):
"""
def __init__(self, project):
super(LegacyClusterGenerator, self).__init__(project)
super().__init__(project)
def count_emitters(self, model, index):
"""

View File

@@ -1,40 +0,0 @@
"""
@package pmsco.compat
compatibility code
code bits to provide compatibility for different python versions.
currently supported 2.7 and 3.6.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from io import open as io_open
def open(fname, mode='r', encoding='latin1'):
"""
open a data file for read/write/append using the default str type
this is a drop-in for io.open
where data is exchanged via the built-in str type of python,
whether this is a byte string (python 2) or unicode string (python 3).
the file is assumed to be a latin-1 encoded binary file.
@param fname: file name and path
@param mode: 'r', 'w' or 'a'
@param encoding: 'latin1' (default), 'ascii' or 'utf-8'
@return file handle
"""
if isinstance(b'b', str):
# python 2
mode += 'b'
kwargs = {}
else:
# python 3
mode += 't'
kwargs = {'encoding': encoding}
return io_open(fname, mode, **kwargs)

163
pmsco/config.py Normal file
View File

@@ -0,0 +1,163 @@
"""
@package pmsco.config
infrastructure for configurable objects
@author Matthias Muntwiler
@copyright (c) 2021-23 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import collections.abc
import inspect
import logging
import os
from pathlib import Path
from string import Template
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union
logger = logging.getLogger(__name__)
PathLike = Union[str, os.PathLike]
DataDict = Mapping[str, Union[str, int, float, Iterable, Mapping]]
def resolve_path(path: PathLike, dirs: Mapping[str, Any]):
"""
Resolve a file path by replacing placeholders.
Placeholders are enclosed in curly braces.
Values for all possible placeholders are provided in a dictionary.
@param path: str, Path or other path-like.
Example: '${work}/test/testfile.dat'.
@param dirs: Dictionary mapping placeholders to project paths.
The paths can be str, Path or other path-like
Example: {'work': '/home/user/work'}
@return: pathlib.Path object
"""
return Path(*(Template(p).substitute(dirs) for p in Path(path).parts))
class ConfigurableObject(object):
"""
Parent class for objects that can be configured from a runfile
The runfile is a JSON file that contains object data in a nested dictionary structure.
In the dictionary structure the keys are property or attribute names of the object to be initialized.
Keys starting with a non-alphabetic character (except for some special keys like __class__) are ignored.
These can be used as comments, or they protect private attributes.
The values can be numeric values, strings, lists or dictionaries.
Simple values are simply assigned using setattr.
This may call a property setter if defined.
Lists are iterated. Each item is appended to the attribute.
The attribute must implement an append method in this case.
If an item is a dictionary and contains the special key '__class__',
an object of that class is instantiated and recursively initialized with the dictionary elements.
This requires that the class can be found in the module scope passed to the parser methods,
and that the class inherits from this class.
Cases that can't be covered easily using this mechanism
should be implemented in a property setter.
Value-checking should also be done in a property setter (or the append method in sequence-like objects).
Attributes
----------
project_symbols: Dictionary of symbols that should be used to resolve class and function names.
This is usually the globals() dictionary of the project module.
"""
def __init__(self):
super().__init__()
self.project_symbols: Optional[Mapping[str, Any]] = None
def set_properties(self, symbols: Optional[Mapping[str, Any]],
data_dict: DataDict,
project: 'ConfigurableObject') -> None:
"""
Set properties from dictionary.
@param symbols: Dictionary of symbols that should be used to resolve class names.
This is usually the globals() dictionary of the project module.
Classes are resolved using the eval function.
@param data_dict: Dictionary of properties to set.
See the class description for details.
@param project: Reference to the project object.
@return: None
"""
self.project_symbols = symbols
for key in data_dict:
if key[0].isalpha():
self.set_property(symbols, key, data_dict[key], project)
def set_property(self, symbols: Optional[Mapping[str, Any]],
key: str,
value: DataDict,
project: 'ConfigurableObject') -> None:
"""
Set one property.
@param symbols: Dictionary of symbols that should be used to resolve class names.
This is usually the globals() dictionary of the project module.
Classes are resolved using the eval function.
@param key: Attribute name to set.
@param value: New value of the attribute.
@param project: Reference to the project object.
@return: None
"""
obj = self.parse_object(symbols, value, project)
if hasattr(self, key):
if obj is not None:
if isinstance(obj, collections.abc.MutableSequence):
attr = getattr(self, key)
for item in obj:
attr.append(item)
elif isinstance(obj, collections.abc.Mapping):
d = getattr(self, key)
if d is not None and isinstance(d, collections.abc.MutableMapping):
d.update(obj)
else:
setattr(self, key, obj)
else:
setattr(self, key, obj)
else:
setattr(self, key, obj)
else:
logger.warning(f"class {self.__class__.__name__} does not have attribute {key}.")
def parse_object(self, symbols: Optional[Mapping[str, Any]],
value: DataDict,
project: 'ConfigurableObject') -> object:
if isinstance(value, collections.abc.MutableMapping) and "__class__" in value:
cn = value["__class__"]
try:
c = eval(cn, symbols)
except (AttributeError, KeyError, NameError, ValueError):
logger.critical(f"can't resolve class name {cn}")
raise
s = inspect.signature(c)
if 'project' in s.parameters:
o = c(project=project)
else:
o = c()
o.set_properties(symbols, value, project)
elif isinstance(value, collections.abc.MutableSequence):
o = [self.parse_object(symbols, i, project) for i in value]
else:
o = value
return o

View File

@@ -1,32 +1,36 @@
"""
@package pmsco.data
import, export, evaluation of msc data.
Import, export, evaluation of msc data.
this module provides common functions for loading/saving and manipulating PED scan data sets.
This module provides common functions for loading/saving and manipulating PED scan data sets.
@author Matthias Muntwiler
@copyright (c) 2015-17 by Paul Scherrer Institut @n
@copyright (c) 2015-23 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import numpy as np
import numpy.typing as npt
import os
import scipy.special
import scipy.optimize as so
from pmsco.compat import open
import pmsco.loess.loess as loess
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union
import h5py
logger = logging.getLogger(__name__)
try:
import loess
except (ModuleNotFoundError, ImportError) as e:
loess = None
logger.critical("Error importing the loess package.", exc_info=e)
## energy, intensity
DTYPE_EI = [('e', 'f4'), ('i', 'f4')]
## energy, theta, phi, intensity
@@ -43,9 +47,11 @@ DTYPE_TP = [('t', 'f4'), ('p', 'f4')]
DTYPE_TPI = [('t', 'f4'), ('p', 'f4'), ('i', 'f4')]
## theta, phi, intensity, sigma (standard deviation)
DTYPE_TPIS = [('t', 'f4'), ('p', 'f4'), ('i', 'f4'), ('s', 'f4')]
## intensity, theta, phi
DTYPE_ITP = [('i', 'f4'), ('t', 'f4'), ('p', 'f4')]
DTYPES = {'EI': DTYPE_EI, 'ETPI': DTYPE_ETPI, 'ETPIS': DTYPE_ETPIS, 'ETPAI': DTYPE_ETPAI, 'ETPAIS': DTYPE_ETPAIS,
'TP': DTYPE_TP, 'TPI': DTYPE_TPI, 'TPIS': DTYPE_TPIS, }
'TP': DTYPE_TP, 'TPI': DTYPE_TPI, 'TPIS': DTYPE_TPIS, 'ITP': DTYPE_ITP, }
DATATYPES = DTYPES.keys
## supportd scan types
@@ -55,8 +61,11 @@ DATATYPES = DTYPES.keys
# @arg @c 'TP' theta - phi (holo scan)
SCANTYPES = ['E', 'EA', 'ET', 'TP']
GenTextFileLike = Union[str, os.PathLike, Iterable[str], int]
OSFileLike = Union[str, os.PathLike, int]
def create_etpi(shape, sigma_column=True):
def create_etpi(shape: Tuple[int], sigma_column: bool = True) -> np.ndarray:
"""
create an ETPI array of a given size.
@@ -64,6 +73,7 @@ def create_etpi(shape, sigma_column=True):
the array is initialized with zeroes.
@param shape (tuple) shape of the array
@param sigma_column: whether the array should include a sigma field (ETPIS type instead of ETPI)
"""
if sigma_column:
data = np.zeros(shape, dtype=DTYPE_ETPIS)
@@ -72,7 +82,7 @@ def create_etpi(shape, sigma_column=True):
return data
def create_data(shape, datatype='', dtype=None):
def create_data(shape: Tuple[int], datatype: str = '', dtype: Optional[npt.DTypeLike] = None) -> np.ndarray:
"""
create a data array of a given size and type.
@@ -90,7 +100,108 @@ def create_data(shape, datatype='', dtype=None):
return data
def load_plt(filename, int_column=-1):
def holo_grid(theta_start: float = 90., theta_step: float = 1., theta_range: float = 90.,
phi_start: float = 0., phi_range: float = 360., phi_refinement: float = 1.):
"""
Generator of a holo grid with constant point density in solid angle.
The generator yields the polar coordinates of a hologram scan in the traditional Osterwalder fashion,
where the grid points are distributed evenly on the hemisphere by varying the azimuthal step size,
while the polar step size is constant.
The yield are tuples (theta, phi) in degrees.
Theta is the polar, phi the azimuthal coordinate.
@param theta_start Maximum polar angle in degrees, 0..90. Defaults to 90 (grazing emission).
@param theta_step Polar angle step in degrees, 1..90. Defaults to 1.
@param theta_range Polar angle range in degrees, 1..th_start. Defaults to 90.
@param phi_start Azimuthal start angle in degrees. Defaults to 0.
This azimuth is included at every polar step.
@param phi_range Azimuthal range in degrees. Defaults to 360.
@param phi_refinement Azimuthal refinement/oversampling (scalar). Defaults to 1.
A refinement of 2 yields a factor 2 more grid points in the azimuthal sub-scans.
@return yield tuples (theta, phi) in degrees
"""
deg2rad = 0.01745329
def calc_phi_step(th):
if th < 0.5 or int(phi_range * math.sin(th * deg2rad) * phi_refinement / theta_step) == 0:
phi_st = 0.0
else:
phi_st = phi_range / int(th / theta_start * phi_range / theta_step)
if abs(phi_st) < 0.001:
phi_st = 360.
return phi_st
for theta in np.arange(theta_range, -theta_step, -theta_step):
phi_step = calc_phi_step(theta)
for phi in np.arange(phi_start, phi_range, phi_step):
yield theta, phi
def holo_array(generator: Callable[..., Iterable[Tuple[float, float]]],
generator_args: Dict,
datatype: str = 'TP',
dtype: Optional[npt.DTypeLike] = None) -> np.ndarray:
"""
Create an hologram scan grid in a numpy array.
A holo data array is a numpy structured array containing at least
a column for theta (polar angle) and phi (azimuthal angle).
The theta and phi columns are filled with angles from the holo_grid (or custom generator) function.
The array can contain further columns for energy, intensity, etc. according to the data type specified.
These columns are initialized with zeroes.
@param generator Generator that yields tuples (theta, phi) for each grid point,
given the keyword arguments kwargs.
Defaults to holo_grid, the traditional Osterwalder holo scan.
@param generator_args Keyword arguments to be passed to the generator.
For arguments of the traditional holo scan, see the documentation of holo_grid.
@param datatype See DATATYPES. Must contain 'T' and 'P' dimensions. Defaults to 'TP'.
@param dtype See DTYPES. Must contain a 't' and 'p' column. Takes precedence over datatype.
Defaults to None (not specified).
"""
if not dtype:
dtype = DTYPES[datatype]
tp = np.fromiter(generator(**generator_args), dtype=DTYPES['TP'])
result = np.zeros(tp.shape, dtype=dtype)
result['t'] = tp['t']
result['p'] = tp['p']
return result
def analyse_holoscan_steps(holoscan: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Find the polar and azimuthal steps in a holoscan.
@param holoscan:
@return: thetas: unique theta angles. sorted.
dtheta: theta steps for each theta
dphi: phi step for each theta
"""
thetas, indices, counts = np.unique(holoscan['t'], return_index=True, return_counts=True)
dtheta = np.diff(thetas)
dtheta = np.append(dtheta, dtheta[-1])
adjusted_phis = np.append(holoscan['p'], holoscan['p'][-1])
phis0 = adjusted_phis[indices]
phis1 = adjusted_phis[indices+1]
dphi = phis1 - phis0
phi_range = counts[-1] * dphi[-1]
dphi[counts <= 1] = phi_range
return thetas, dtheta, dphi
def load_plt(filename: GenTextFileLike, int_column: int = -1) -> np.ndarray:
"""
loads ETPI data from an MSC output (plt) file
@@ -117,12 +228,13 @@ def load_plt(filename, int_column=-1):
data[i]['p'] = phi
data[i]['i'] = selected intensity column
"""
data = np.genfromtxt(filename, usecols=(0, 2, 3, int_column), dtype=DTYPE_ETPI)
data = np.atleast_1d(np.genfromtxt(filename, usecols=(0, 2, 3, int_column), dtype=DTYPE_ETPI))
sort_data(data)
return data
def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_cluster=False):
def load_edac_pd(filename: OSFileLike, int_column: int = -1,
energy: float = 0.0, theta: float = 0.0, phi: float = 0.0, fixed_cluster: bool = False) -> np.ndarray:
"""
load ETPI or ETPAI data from an EDAC PD output file.
@@ -157,7 +269,8 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
data[i]['i'] = selected intensity column
@endverbatim
"""
with open(filename, "r") as f:
with open(filename, "rt", encoding="latin1") as f:
header1 = f.readline().strip()
header2 = f.readline().strip()
if not header1 == '--- scan PD':
@@ -189,7 +302,7 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
logger.warning("unexpected EDAC output file column name")
break
cols = tuple(cols)
raw = np.genfromtxt(filename, usecols=cols, dtype=dtype, skip_header=2)
raw = np.atleast_1d(np.genfromtxt(filename, usecols=cols, dtype=dtype, skip_header=2))
if fixed_cluster:
etpi = np.empty(raw.shape, dtype=DTYPE_ETPAI)
@@ -218,7 +331,7 @@ def load_edac_pd(filename, int_column=-1, energy=0.0, theta=0.0, phi=0.0, fixed_
return etpi
def load_etpi(filename):
def load_etpi(filename: GenTextFileLike) -> np.ndarray:
"""
loads ETPI or ETPIS data from a text file
@@ -253,7 +366,7 @@ def load_etpi(filename):
return data
def load_data(filename, dtype=None):
def load_data(filename: GenTextFileLike, dtype: Optional[npt.DTypeLike] = None):
"""
load column data (ETPI, and the like) from a text file.
@@ -288,7 +401,7 @@ def load_data(filename, dtype=None):
return data
def format_extension(data):
def format_extension(data: np.ndarray) -> str:
"""
format the file extension based on the contents of an array.
@@ -299,7 +412,7 @@ def format_extension(data):
return "." + "".join(data.dtype.names)
def save_data(filename, data):
def save_data(filename: OSFileLike, data: npt.ArrayLike) -> None:
"""
save column data (ETPI, and the like) to a text file.
@@ -315,7 +428,7 @@ def save_data(filename, data):
np.savetxt(filename, data, fmt='%g')
def sort_data(data):
def sort_data(data: np.ndarray) -> None:
"""
sort scan data (ETPI and the like) in a consistent order.
@@ -338,7 +451,8 @@ def sort_data(data):
data.sort(kind='mergesort', order=sort_key)
def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
def restructure_data(data: np.ndarray, dtype: Optional[npt.DTypeLike] = None,
defaults: Optional[Mapping] = None) -> np.ndarray:
"""
restructure the type of a data array by adding or removing columns.
@@ -361,6 +475,8 @@ def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
@return: re-structured numpy array or
@c data if the new and original data types are the same.
"""
if dtype is None:
dtype = DTYPE_ETPAIS
if data.dtype == dtype:
return data
else:
@@ -378,7 +494,7 @@ def restructure_data(data, dtype=DTYPE_ETPAIS, defaults=None):
return new_data
def common_dtype(scans):
def common_dtype(scans: Iterable[Union[npt.ArrayLike, npt.DTypeLike]]) -> npt.DTypeLike:
"""
determine the common data type for a number of scans.
@@ -409,7 +525,7 @@ def common_dtype(scans):
return dtype
def detect_scan_mode(data):
def detect_scan_mode(data: np.ndarray) -> Tuple[List[str], Dict[str, np.ndarray]]:
"""
detect the scan mode and unique scan positions in a data array.
@@ -495,7 +611,7 @@ def detect_scan_mode(data):
return scan_mode, scan_positions
def filter_tp(data, filter):
def filter_tp(data: np.ndarray, _filter: np.ndarray) -> np.ndarray:
"""
select data points from an ETPI array that match theta and phi coordinates of another ETPI array.
@@ -503,7 +619,7 @@ def filter_tp(data, filter):
@param data ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
@param filter ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
@param _filter ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
only 't' and 'p' columns are used.
@return filtered data (numpy.ndarray)
@@ -512,18 +628,19 @@ def filter_tp(data, filter):
"""
# copy theta,phi into separate structured arrays
data_tp = np.zeros_like(data, dtype=[('t', '<i4'), ('p', '<i4')])
filter_tp = np.zeros_like(filter, dtype=[('t', '<i4'), ('p', '<i4')])
filt_tp = np.zeros_like(_filter, dtype=[('t', '<i4'), ('p', '<i4')])
# multiply by 10, round to integer
data_tp['t'] = np.around(data['t'] * 10.0)
data_tp['p'] = np.around(data['p'] * 10.0)
filter_tp['t'] = np.around(filter['t'] * 10.0)
filter_tp['p'] = np.around(filter['p'] * 10.0)
filt_tp['t'] = np.around(_filter['t'] * 10.0)
filt_tp['p'] = np.around(_filter['p'] * 10.0)
# calculate intersection
idx = np.in1d(data_tp, filter_tp)
idx = np.in1d(data_tp, filt_tp)
result = data[idx]
return result
def interpolate_hemi_scan(rect_tpi, hemi_tpi):
def interpolate_hemi_scan(rect_tpi: np.ndarray, hemi_tpi: np.ndarray) -> np.ndarray:
"""
interpolate a hemispherical scan from a rectangular angle scan.
@@ -555,7 +672,9 @@ def interpolate_hemi_scan(rect_tpi, hemi_tpi):
hemi_tpi['i'][sel_theta] = result
return hemi_tpi
def reshape_2d(flat_data, axis_columns, return_column='i'):
def reshape_2d(flat_data: np.ndarray, axis_columns: Sequence[str], return_column: str = 'i') -> \
Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
reshape an ETPI-like array into a two-dimensional array according to the scan axes.
@@ -565,6 +684,8 @@ def reshape_2d(flat_data, axis_columns, return_column='i'):
@param axis_columns list of column names that designate the axes
@param return_column: name of field to return in two dimensions
@return the tuple (result_data, axis0, axis1), where
@arg result_data (ndarray) new two-dimensional ndarray of the scan
@arg axis0 (ndarray) scan positions along the first dimension
@@ -579,7 +700,7 @@ def reshape_2d(flat_data, axis_columns, return_column='i'):
return data.copy(), axis0, axis1
def calc_modfunc_mean(data):
def calc_modfunc_mean(data: np.ndarray) -> np.ndarray:
"""
calculates the modulation function using the mean value of data.
this is a simplified calculation method
@@ -615,7 +736,7 @@ def calc_modfunc_mean(data):
return modf
def calc_modfunc_loess(data, smth=0.4):
def calc_modfunc_loess(data: np.ndarray, smth: float = 0.4) -> np.ndarray:
"""
calculate the modulation function using LOESS (locally weighted regression) smoothing.
@@ -669,20 +790,27 @@ def calc_modfunc_loess(data, smth=0.4):
return modf
def rfactor(experiment, theory):
def square_diff_rfactor(experiment: np.ndarray, theory: np.ndarray) -> float:
"""
calculate the R-factor of a calculated modulation function.
Calculate the R-factor from the normalized sum of squared differences.
if the sigma column is present in experiment and non-zero,
If the sigma column is present in experiment and non-zero,
the R-factor terms are weighted by 1/sigma**2.
the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
the array elements are compared element-by-element.
terms having NaN intensity are ignored.
The input arrays must have the same shape and the coordinate columns must be identical.
The array elements are compared element-by-element.
The values of the coordinate arrays do not influence the result.
Terms having NaN intensity are ignored.
@param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
This function can be specified in the Scan.rfactor_func parameter of the project.
@param theory: ETPI or ETPAI array containing the calculated modulation functions.
@param experiment: (numpy structured array)
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
If an `s` field is present and non-zero,
the R-factor terms are weighted by 1/sigma**2.
@param theory: (numpy structured array)
ETPI or ETPAI array containing the theoretical function.
@return scalar R-factor in the range from 0.0 to 2.0.
@@ -702,7 +830,7 @@ def rfactor(experiment, theory):
return sum1 / sum2
def scaled_rfactor(scale, experiment, weights, theory):
def scaled_rfactor_func(scale: float, experiment: np.ndarray, weights: np.ndarray, theory: np.ndarray) -> float:
"""
calculate the R-factor of a modulation function against the measurement with scaled amplitude.
@@ -732,6 +860,7 @@ def scaled_rfactor(scale, experiment, weights, theory):
@raise ValueError if all experiments and theory values or all weights are zero.
"""
difs = weights * (scale * experiment - theory) ** 2
sums = weights * (scale ** 2 * experiment ** 2 + theory ** 2)
sum1 = difs.sum(dtype=np.float64)
@@ -739,7 +868,7 @@ def scaled_rfactor(scale, experiment, weights, theory):
return sum1 / sum2
def optimize_rfactor(experiment, theory):
def optimize_rfactor(experiment: np.ndarray, theory: np.ndarray) -> float:
"""
calculate the R-factor of a calculated modulation function against the measurement, adjusting their amplitude.
@@ -750,13 +879,15 @@ def optimize_rfactor(experiment, theory):
this is useful if the amplitudes of the two functions do not match due to systematic effects
of the calculation or the measurement.
the optimization is done in a scipy.optimize.least_squares optimization of the scaled_rfactor() function.
the optimization is done in a scipy.optimize.least_squares optimization of the scaled_rfactor_func() function.
the initial guess of the scaling factor is 0.7, the constraining boundaries are 1/10 and 10.
the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
the array elements are compared element-by-element.
terms having NaN intensity are ignored.
This function can be specified in the Scan.rfactor_func parameter of the project.
@param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
@param theory: ETPI or ETPAI array containing the calculated modulation functions.
@@ -773,13 +904,13 @@ def optimize_rfactor(experiment, theory):
else:
wgts = np.ones_like(experiment['i'])
result = so.least_squares(scaled_rfactor, 0.7, bounds=(0.1, 10.0), args=(experiment['i'], wgts, theory['i']))
result_r = scaled_rfactor(result.x, experiment['i'], wgts, theory['i'])
result = so.least_squares(scaled_rfactor_func, 0.7, bounds=(0.1, 10.0), args=(experiment['i'], wgts, theory['i']))
result_r = scaled_rfactor_func(result.x, experiment['i'], wgts, theory['i'])
return result_r
def alpha_average(data):
def alpha_average(data: np.ndarray) -> np.ndarray:
"""
average I(alpha, theta, phi) over alpha.
@@ -809,7 +940,7 @@ def alpha_average(data):
return result
def phi_average(data):
def phi_average(data: np.ndarray) -> np.ndarray:
"""
average I(theta, phi) over phi.
@@ -827,7 +958,7 @@ def phi_average(data):
names = list(data.dtype.names)
names.remove('p')
dtype = [(name, data.dtype[name].str) for name in names]
result = create_data((nt), dtype=dtype)
result = create_data((nt,), dtype=dtype)
for i, t in enumerate(t_axis):
sel = np.abs(scan_positions['t'] - t) < 0.01
@@ -839,7 +970,7 @@ def phi_average(data):
return result
def alpha_mirror_average(data):
def alpha_mirror_average(data: np.ndarray) -> np.ndarray:
"""
calculate the average of I(alpha, theta, phi) and I(-alpha, theta, phi).
@@ -871,3 +1002,14 @@ def alpha_mirror_average(data):
logger.warning('asymmetric alpha scan. skipping alpha mirror average.')
return result1
if loess is not None:
default_modfunc = calc_modfunc_loess
logger.info("pmsco.data.default_modfunc = pmsco.data.calc_modfunc_loess")
else:
default_modfunc = calc_modfunc_mean
logger.warning("pmsco.data.default_modfunc = pmsco.data.calc_modfunc_mean")
default_rfactor = square_diff_rfactor
logger.info("pmsco.data.default_rfactor = pmsco.data.square_diff_rfactor")

File diff suppressed because it is too large Load Diff

169
pmsco/database/access.py Normal file
View File

@@ -0,0 +1,169 @@
"""
@package pmsco.database.access
wrapper classes for access to a pmsco database
the most import class to be used is DatabaseAccess.
usage:
~~~~~~{.py}
db = DatabaseAccess()
db.connect("file.db")
with db.session():
# database access here
# ...
# commit transaction
session.commit()
# continue in new transaction
# at the end of the context
# the session is closed and orm objects are detached from the database.
~~~~~~
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2016-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import fasteners
import logging
from pathlib import Path
import pmsco.database.orm as orm
logger = logging.getLogger(__name__)
class _DummyLock(object):
"""
dummy lock used for in memory database.
"""
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
pass
class LockedSession(object):
"""
database session context manager
this context manager (to be used in a with statement)
acquires a lock on the database lock file
and provides a database session (orm.Session()).
the session is closed (and pending transactions committed) on exit.
if an exception occurs, pending transactions are rolled back before the session is closed.
@note the term _session_ refers to a session in sqlalchemy.
"""
def __init__(self, lock_file=None):
self.lock_file = lock_file
self._session = None
self._lock = None
def __enter__(self):
self._lock = self.lock()
self._lock.__enter__()
self._session = orm.Session()
return self._session
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is None:
self._session.close()
else:
self._session.rollback()
self._session.close()
self._lock.__exit__(exc_type, exc_val, exc_tb)
self._lock = None
def lock(self):
"""
create a file-lock context manager for the database.
this is either a fasteners.InterProcessLock object on self._lock_filename
or a _DummyLock object if the database is in memory.
InterprocessLock allows to serialize access to the database by means of a lock file.
this is necessary if multiple pmsco instances require access to the same database.
_DummyLock is used with an in-memory database which does not require locking.
the lock object can be used as context-manager in a with statement.
"""
if self.lock_file:
return fasteners.InterProcessLock(self.lock_file)
else:
return _DummyLock()
class DatabaseAccess(object):
"""
basic database connection
this class maintains a database connection and builds session objects.
a _session_ corresponds to an sqlalchemy session, which defines the lifecycle of mapped objects.
a session can open one or multiple (subsequent) transactions.
usage:
~~~~~~{.py}
db = DatabaseAccess()
db.connect("file.db")
with db.session():
# database access
session.commit()
~~~~~~
the session object is a context handler.
it commits the transaction and closes the session at the end of the context.
if an exception occurs, it rolls back the transaction and closes the session before passing the exception.
"""
def __init__(self):
self.db_file = ""
self.lock_file = ""
def connect(self, db_file, lock_file=""):
"""
connect to a new or existing database file.
if the file does not exist, or if it is empty, a new database schema is created.
@param db_file: name of a file or ":memory:" for an in-memory database.
@param lock_file: name of a file that is used to lock the database.
by default, the db_filename with a suffix of ".lock" is used.
for most uses, the default should be fine.
the argument is provided mainly for testing the locking functionality.
this must be a file that is not used for anything else.
the file does not need to exist.
it's best if the file is in the same directory as the database file.
all clients of a database must use the same lock file.
@return: None
"""
self.db_file = db_file
if lock_file:
self.lock_file = lock_file
elif db_file == ":memory:":
self.lock_file = ""
else:
self.lock_file = Path(str(db_file) + ".lock")
orm.connect(orm.sqlite_link(self.db_file))
def session(self):
"""
open a database session.
this function returns a pmsco.database.util.LockedSession object
which is a context handler that provides an sqlalchemy session
that is locked against concurrent access from other DatabaseAccess instances.
see the class description for an example usage pattern.
@return: pmsco.database.util.LockedSession() object.
"""
return LockedSession(self.lock_file)

329
pmsco/database/common.py Normal file
View File

@@ -0,0 +1,329 @@
"""
@package pmsco.database.common
common database operations
this module gathers a number of common database operations.
all functions require an open session object from pmsco.database.access.DatabaseAccess.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2016-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import logging
import sqlalchemy
import pmsco.database.orm as orm
logger = logging.getLogger(__name__)
def filter_project(query, project_or_name_or_id):
"""
filter a query by project
@param query: sqlalchemy query object
@param project_or_name_or_id: orm.Project object or project name or project id.
@return: modified query
"""
if isinstance(project_or_name_or_id, orm.Project):
query = query.filter(orm.Project == project_or_name_or_id)
elif isinstance(project_or_name_or_id, int):
query = query.filter(orm.Project.id == project_or_name_or_id)
else:
query = query.filter(orm.Project.name == project_or_name_or_id)
return query
def filter_job(query, job_or_name_or_id):
"""
filter a query by job
@param query: sqlalchemy query object
@param job_or_name_or_id: orm.Job object or job name or job id.
@return: modified query
"""
if isinstance(job_or_name_or_id, orm.Job):
query = query.filter(orm.Job == job_or_name_or_id)
elif isinstance(job_or_name_or_id, int):
query = query.filter(orm.Job.id == job_or_name_or_id)
else:
query = query.filter(orm.Job.name == job_or_name_or_id)
return query
def query_params(session, project=None, job=None):
"""
query parameter names and their associated objects from the database
the result is a dictionary of orm.Param objects mapped to their respective keys.
the parameters can be filtered by project and/or job.
if no arguments are given, parameters from all projects are returned.
@note make sure previous changes have been committed. else the query may not find all records.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
@param project: orm.Project object or project name or project id.
default: don't filter projects.
@param job: orm.Job object or job name or job id.
default: don't filter jobs
@return: dictionary of parameters
"""
query = session.query(orm.Param).join(orm.ParamValue).join(orm.Model).join(orm.Job).join(orm.Project)
if project is not None:
query = filter_project(query, project)
if job is not None:
query = filter_job(query, job)
params = query.all()
params = {param.key: param for param in params}
return params
def query_tags(session, project=None, job=None):
"""
query tag names and their associated objects from the database
the result is a dictionary of orm.Tag objects mapped to their respective keys.
the tags can be filtered by project and/or job.
if no arguments are given, tags from all projects are returned.
@note the orm.Job.tags mapping is an alternative way to access job tags.
@note make sure previous changes have been committed. else the query may not find all records.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
@param project: orm.Project object or project name or project id.
default: don't filter projects.
@param job: orm.Job object or job name or job id.
default: don't filter jobs
@return: dictionary of tags
"""
query = session.query(orm.Tag).join(orm.JobTag).join(orm.Job).join(orm.Project)
if project is not None:
query = filter_project(query, project)
if job is not None:
query = filter_job(query, job)
tags = query.all()
tags = {tag.key: tag for tag in tags}
return tags
def query_job_tags(session, project=None, job=None):
"""
query tags (keys and values) from the database
the result is a dictionary of tag values (str) mapped to their respective keys (str).
the tags can be filtered by project and/or job.
if no arguments are given, tags from all projects are returned.
@note for one specific job, this is equivalent to the orm.Job.tags mapping.
@note make sure previous changes have been committed. else the query may not find all records.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
@param project: orm.Project object or project name or project id.
default: don't filter projects.
@param job: orm.Job object or job name or job id.
default: don't filter jobs
@return: tags dictionary {key: value}
"""
query = session.query(orm.JobTag).join(orm.Job).join(orm.Project)
if project is not None:
query = filter_project(query, project)
if job is not None:
query = filter_job(query, job)
job_tags = query.all()
job_tags = {jt.tag.key: jt.value for jt in job_tags}
return job_tags
def register_project(session, name, code, allow_existing=False):
"""
register (insert or query) a project with the database.
a new project record with the given parameters is inserted into the database.
if a project of the same name already exists, the existing record is returned.
@attention the orm.Project.id field is undefined until the session is committed!
it's better to identify a project by name or orm.Project object.
@note make sure previous changes have been committed. else the query may not find an existing project.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
the session is committed if a new project entry has been added.
@param name: project name. must be unique within the database.
@param code: name of the project module.
@param allow_existing: selects the behaviour if a project record exists in the database:
return the corresponding orm.Project (True) or raise an exception (False, default).
the exception is ValueError.
@return: orm.Project object.
the object can be used and modified as long as the session is active.
note that the id attribute is invalid until the session is committed!
@raise ValueError if the job exists and allow_existing is False.
"""
query = session.query(orm.Project)
query = query.filter(orm.Project.name == name)
project = query.one_or_none()
if project is None:
project = orm.Project(name=name, code=code)
session.add(project)
session.commit()
elif not allow_existing:
raise ValueError(f"project {project.name} exists")
return project
def get_project(session, project_or_name_or_id):
"""
resolve a project by name or id.
this function resolves a project specification to an orm.Project object.
if `project_or_name_or_id` is an orm.Project object, it just returns that object without any checks.
else, the project is looked up in the database.
@attention if `project_or_name_or_id` is an orm.Project object the function returns it without checks!
that means if the object is detached, you cannot use it to query results from the database.
if you need an object that is valid and in sync with the database,
resolve it by name or id!
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
@param project_or_name_or_id: orm.Project object or project name or project id.
@return: orm.Project object
"""
if isinstance(project_or_name_or_id, orm.Project):
project = project_or_name_or_id
elif isinstance(project_or_name_or_id, int):
project = session.query(orm.Project).get(project_or_name_or_id)
else:
query = session.query(orm.Project)
query = query.filter(orm.Project.name == project_or_name_or_id)
project = query.one()
return project
def register_job(session, project, job_name, allow_existing=False, **job_attr):
"""
register (insert or query) a new job with the database.
a new job record with the given parameters is inserted into the database.
if a job of the same name exists within the given project, the existing record is returned
(without modifications!).
@note make sure previous changes have been committed. else the query may not find an existing project.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
the session is committed if a new job entry has been added.
@param project: orm.Project object or project name or project id.
@param job_name: name of job. unique in the project
@param job_attr: optional attributes of the job.
the keywords correspond to attribute names of the pmsco.database.Job object.
@param allow_existing: selects the behaviour if a job record exists in the database:
return the corresponding orm.Job (True) or raise an exception (False, default).
the exception is ValueError.
@return: orm.Job object.
the object can be used and modified as long as the session is active.
note that the id attribute is invalid until the session is committed!
@raise ValueError if the job exists and allow_existing is False.
"""
project = get_project(session, project)
query = session.query(orm.Job).join(orm.Project)
query = query.filter(orm.Project.name == project.name)
query = query.filter(orm.Job.name == job_name)
job = query.one_or_none()
if job is None:
job = orm.Job()
job.name = job_name
job.project = project
optional_args = {'mode', 'machine', 'git_hash', 'datetime', 'processes', 'hours', 'description'}
for name, value in job_attr.items():
if name in optional_args:
setattr(job, name, value)
session.add(job)
session.commit()
elif not allow_existing:
raise ValueError(f"a job {job_name} exists in project {project.name}")
return job
def get_job(session, project_or_name_or_id, job_or_name_or_id):
"""
resolve a job by name or id.
this function resolves any combination of project and job specification to an orm.Job object.
if `job_or_name_or_id` is an orm.Job object, it just returns that object without any checks.
else, the job is looked up in the database.
@attention if `job_or_name_or_id` is an orm.Job object the function returns it without checks!
that means if the object is detached, you cannot query results from the database.
if you need an object that is valid and in sync with the database,
query the job by name or id!
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
@param project_or_name_or_id: orm.Project object or project name or project id.
@param job_or_name_or_id: orm.Job object or job name or job id.
@return: orm.Job object
"""
if isinstance(job_or_name_or_id, orm.Job):
job = job_or_name_or_id
elif isinstance(job_or_name_or_id, int):
job = session.query(orm.Job).get(job_or_name_or_id)
else:
project = get_project(session, project_or_name_or_id)
query = session.query(orm.Job).join(orm.Project)
query = query.filter(orm.Project.name == project.name)
query = query.filter(sqlalchemy.or_(orm.Job.id == job_or_name_or_id,
orm.Job.name == job_or_name_or_id))
job = query.one()
return job
def register_job_tags(session, job, tags):
"""
insert or update key-value tags of a job
this is one of many options to populate the Tag and JobTag tables.
it is not required to use this function.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
@param job: orm.Job object
@param tags: dictionary of tags
@return: None
"""
for k, v in tags.items():
job.tags[k] = v
if tags:
session.commit()
def register_params(session, params):
"""
register (insert missing) parameter names
add new parameter names to the global list of parameter names.
this is one of many options to populate the Param table.
it is not required to use this function.
this function implies a session flush.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
the session is committed if new parameters have been added
@param params: sequence of parameter names
param names with leading underscore are ignored.
@return: None
"""
existing_params = query_params(session).keys()
params = [param for param in params if param[0] != '_']
new_params = set(params) - set(existing_params)
for k in new_params:
session.add(orm.Param(key=k))
if new_params:
session.commit()

57
pmsco/database/git.py Normal file
View File

@@ -0,0 +1,57 @@
"""
@package pmsco.database.git
git metadata
this module retrieves the git hash of the running code for job metadata.
this requires that the code is run from a git repository
and that the gitpython package is installed.
gitpython is loaded on demand.
common errors (missing gitpython or invalid repository) are handled.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import importlib
def git():
"""
import the git module from GitPython
@return: git module or None if an error occurred
"""
try:
return importlib.import_module('git')
except ImportError:
return None
def get_git_hash(repo_path=None):
"""
get the git commit (hash) of the running code (HEAD)
the method looks for a git repository in the source tree of this module.
if successful, it returns the hash string of the HEAD commit.
@return: hexadecimal hash string.
empty string if the file is not in a git repository.
"""
if repo_path is None:
repo_path = __file__
_git = git()
if _git is not None:
try:
repo = _git.Repo(repo_path, search_parent_directories=True)
except _git.exc.InvalidGitRepositoryError:
return ""
else:
return repo.head.commit.hexsha
else:
return ""

406
pmsco/database/ingest.py Normal file
View File

@@ -0,0 +1,406 @@
"""
@package pmsco.database.ingest
ingest existing data such as flat results files (.dat or .tasks.dat) into a database.
the results file is a space-delimited, general text file
such as produced by pmsco.optimizers.population.Population.save_array().
each line contains one result dataset, the columns correspond to the regular and special parameters.
the first row contains the parameter names.
the main function is ingest_job_results().
the other functions require an open database session from pmsco.database.access.DatabaseAccess.session(),
and ingest the metadata and the actual results, respectively.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2016-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import datetime
import logging
import numpy as np
from pathlib import Path
from pmsco.database.access import DatabaseAccess
import pmsco.database.common as common
import pmsco.database.orm as orm
import pmsco.database.util as util
logger = logging.getLogger(__name__)
def insert_result(session, job, index, result, delta=None):
"""
add or update a calculation result including index and model to the database.
@param session: (sqlalchemy.Session) database session.
when updating an existing model, previous changes must have been committed,
else the model may not be found.
this function does not commit the transaction.
@param job: (orm.Job) job object.
use pmsco.database.common.get_object to retrieve by id or name.
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
may also contain the special values '_gen', '_particle', '_timestamp'.
'_gen' and '_particle' are integers and default to None.
'_timestamp' can be numeric (seconds since jan 1, 1970)
or an object that implements a timestamp function like datetime.datetime.
it defaults to the current (local) time.
@param delta: (dict) dictionary containing the delta values.
the keys must correspond to model keys in the result dictionary.
this argument is optional.
@return: (orm.Model, orm.Result) model and result objects
"""
model_obj = store_model(session, job, index, result)
result_obj = store_result_data(session, model_obj, index, result)
store_param_values(session, model_obj, result, delta)
return model_obj, result_obj
def store_model(session, job, index, result):
"""
add or update the model entry for a calculation result in the database.
the method updates the Models table.
the model is identified by job and index.model.
the result is identified by job and index.
if the model exists in the database, it is updated.
@param session: (sqlalchemy.Session) database session.
when updating an existing model, previous changes must have been committed,
else the model may not be found.
this function does not commit the transaction.
@param job: (orm.Job) job object.
use pmsco.database.common.get_object to retrieve by id or name.
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
may also contain the special values '_gen' and '_particle'.
'_gen' and '_particle' default to None if not present.
@return: (orm.Model) updated model object
"""
assert isinstance(job, orm.Job)
model_dict = {'gen': None, 'particle': None}
model_dict.update(util.special_params(result))
try:
model_dict['model'] = index.model
except AttributeError:
model_dict['model'] = index['_model']
q = session.query(orm.Model)
q = q.filter(orm.Model.job == job)
q = q.filter(orm.Model.model == model_dict['model'])
model_obj = q.one_or_none()
if model_obj is None:
model_obj = orm.Model()
model_obj.job = job
model_obj.model = model_dict['model']
session.add(model_obj)
model_obj.gen = model_dict['gen']
model_obj.particle = model_dict['particle']
return model_obj
def store_result_data(session, model_obj, index, result):
"""
add or update a result in the database.
the method updates the Results table.
the model is identified by model_id.
the result is identified by model_id and index.
if the result exists in the database, it is updated.
@param session: (sqlalchemy.Session) database session.
when updating an existing model, previous changes must have been committed,
else the result entry may not be found.
this function does not commit the transaction.
@param model_obj: (orm.Model) model object that is already part of the session.
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
may also contain the special values '_gen', '_particle', '_timestamp'.
'_gen' and '_particle' are integers and default to None.
'_timestamp' can be numeric (seconds since jan 1, 1970)
or an object that implements a timestamp function like datetime.datetime.
it defaults to the current (local) time.
@return: (orm.Result) updated Results object.
"""
assert isinstance(model_obj, orm.Model)
result_dict = util.special_params(result)
result_dict.update(util.special_params(index))
q = session.query(orm.Result)
q = q.filter(orm.Result.model == model_obj)
q = q.filter(orm.Result.scan == result_dict['scan'])
q = q.filter(orm.Result.domain == result_dict['domain'])
q = q.filter(orm.Result.emit == result_dict['emit'])
q = q.filter(orm.Result.region == result_dict['region'])
result_obj = q.one_or_none()
if result_obj is None:
result_obj = orm.Result()
result_obj.model = model_obj
result_obj.scan = result_dict['scan']
result_obj.domain = result_dict['domain']
result_obj.emit = result_dict['emit']
result_obj.region = result_dict['region']
session.add(result_obj)
result_obj.rfac = result_dict['rfac']
try:
result_obj.timestamp = result_dict['timestamp'].timestamp()
except KeyError:
result_obj.timestamp = datetime.datetime.now().timestamp()
except AttributeError:
result_obj.timestamp = result_dict['timestamp']
try:
result_obj.secs = result_dict['secs']
except KeyError:
pass
return result_obj
def store_param_values(session, model_obj, result, delta=None):
"""
add or update parameter values of a model in the database.
the method updates the ParamValues table.
@param session: (sqlalchemy.Session) database session.
when updating an existing model, previous changes must have been committed,
else the result entry may not be found.
this function flushes the session at the end.
it does not commit the transaction.
@param model_obj: (orm.Model) model object that is already part of the session.
@param result: (dict) dictionary containing the parameter values.
the parameter names must exist in the Params table and in the self._model_params dictionary.
special values (with a leading underscore) are ignored.
extra parameters may raise a KeyError.
@param delta: (dict) dictionary containing the delta values.
the keys must correspond to model keys in the result dictionary.
this argument is optional.
@return: None
@raise: KeyError if a parameter key is not registered.
"""
assert isinstance(model_obj, orm.Model)
for key in util.regular_params(result).keys():
pv = orm.ParamValue()
pv.model = model_obj
pv.param_key = key
pv.value = result[key]
try:
pv.delta = delta[key]
except (TypeError, KeyError):
pass
session.add(pv)
session.flush()
def ingest_results_file(session, project, job, filename):
"""
import a results file into the database.
this is a sub-method used by ingest().
a job entry with the given id must exist,
but there must be no model entries referencing the job.
it is not possible to update existing models, results or parameter values using this method.
instead, you have to delete the job (which also deletes all dependent entries)
and re-import the results.
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
the session is flushed but not committed at the end of this function.
@param project: orm.Project object or project name or project id.
@param job: orm.Job object or job name or job id.
@param filename: path and name of the results file.
@return: None.
@raise ValueError if the job already has model entries.
"""
job = common.get_job(session, project, job)
assert isinstance(job, orm.Job)
data = np.atleast_1d(np.genfromtxt(filename, names=True))
try:
unique_models, unique_index = np.unique(data['_model'], True)
except ValueError:
unique_models = np.array([0])
unique_index = np.array([0])
unique_data = data[unique_index]
special_params = util.special_params(data.dtype.names)
model_objs = {}
# iterate on models
for _data in unique_data:
try:
_model = _data['_model']
except ValueError:
_model = unique_models[0]
model = orm.Model(job=job, model=_model)
if 'gen' in special_params:
model.gen = _data['_gen']
if 'particle' in special_params:
model.particle = _data['_particle']
session.add(model)
model_objs[_model] = model
for key, value in util.regular_params(_data).items():
model.values[key] = value
session.flush()
# iterate on results
for _data in data:
try:
_model = _data['_model']
except ValueError:
_model = unique_models[0]
result_entry = {'model': None,
'scan': -1,
'domain': -1,
'emit': -1,
'region': -1,
'rfac': None}
result_entry.update(util.special_params(_data))
result_entry['model'] = model_objs[_model]
result = orm.Result()
for key, value in result_entry.items():
setattr(result, key, value)
session.add(result)
session.flush()
def ingest_job_metadata(session, **kwargs):
"""
ingest job metadata
@param session: (sqlalchemy.Session) database session created by pmsco.database.access.DatabaseAccess.session()
the session is flushed but not committed at the end of this function.
@param kwargs: dictionary of function arguments.
the dictionary contains the following values.
all arguments are required unless noted.
@arg 'resultsfile' (required) name of the .tasks.dat results file.
@arg 'project' (required) unique name of the project.
@arg 'code' (optional) name of the project code.
@arg 'job' (required) name of the calculation job. job name must not exist for the project yet.
@arg 'mode' (required) pmsco optimization mode.
@arg 'machine' (optional) name of the machine where the job ran.
@arg 'processes' (optional) number of processes.
@arg 'hours' (optional) run time in hours (wall time).
@arg 'git_hash' (optional) git hash of the code revision.
@arg 'datetime' (datetime.datetime) time stamp (optional).
if not specified, the argument defaults to the time stamp of the results file.
hint: the constructor of a datetime object is
`datetime.datetime(year, month, day, hour, minute, second)`.
@arg 'description' (optional) meaningful description of the calculation job, up to the user.
@arg 'jobtags' (dict, optional) key=value tags to be associated with the job
@return (orm.Project, orm.Job) orm objects of the inserted records.
@raise sqlalchemy.exc.IntegrityError if the job already exists in the database.
"""
if 'datetime' not in kwargs:
rf = Path(kwargs['resultsfile'])
kwargs['datetime'] = datetime.datetime.fromtimestamp(rf.stat().st_mtime)
project = common.register_project(session, kwargs['project'], kwargs['code'])
job = common.register_job(session, project, kwargs['job'], **kwargs)
try:
common.register_job_tags(session, job, kwargs['jobtags'])
except KeyError:
pass
session.flush()
return project, job
def ingest_job_results(**kwargs):
"""
import results from a calculation job.
this function contains all steps necessary to import the results (tasks.dat)
from a calculation job into a database.
it registers the project and job, and imports the results data.
the project may exist in the database, the job must not exist (raises an exception).
arguments can be specified as dict (**d) or in keyword=value form.
@param kwargs: dictionary of function arguments.
the dictionary contains the following values.
all arguments are required unless noted.
@arg 'workdir' (optional) path to the working directory.
the working directory of the operating system is changed.
this is the root for relative paths of the database and results files.
if not specified, the working directory is unchanged.
@arg 'dbfile' (required) name of the database file.
@arg 'project' (required) unique name of the project.
@arg 'code' (optional) name of the project code.
@arg 'job' (required) name of the calculation job. job name must not exist for the project yet.
@arg 'mode' (required) pmsco optimization mode.
@arg 'machine' (optional) name of the machine where the job ran.
@arg 'processes' (optional) number of processes.
@arg 'hours' (optional) run time in hours (wall time).
@arg 'git_hash' (optional) git hash of the code revision.
@arg 'datetime' (datetime.datetime) time stamp (optional).
if not specified, the argument defaults to the time stamp of the results file.
hint: the constructor of a datetime object is
`datetime.datetime(year, month, day, hour, minute, second)`.
@arg 'description' (optional) meaningful description of the calculation job, up to the user.
@arg 'jobtags' (dict, optional) key=value tags to be associated with the job
@arg 'resultsfile' (required) name of the .tasks.dat results file.
@return dict with 'project_id' and 'job_id'.
these are the database ids of the project and job records.
@raise sqlalchemy.exc.IntegrityError if the job already exists in the database.
"""
try:
wd = Path(kwargs['workdir'])
except KeyError:
pass
else:
wd.cwd()
dba = DatabaseAccess()
dba.connect(kwargs['dbfile'])
with dba.session() as session:
project, job = ingest_job_metadata(session, **kwargs)
ingest_results_file(session, project, job, kwargs['resultsfile'])
session.commit()
ref = {'project_id': project.id, 'job_id': job.id}
return ref

746
pmsco/database/orm.py Normal file
View File

@@ -0,0 +1,746 @@
"""
@package pmsco.database.orm
pmsco results database object-relational mapper
this module declares the database schema and object mapping.
the object-relational mapping uses
the [sqlalchemy framework](https://docs.sqlalchemy.org/en/13/orm/tutorial.html).
the database backend is sqlite3.
for examples how to use the database, see the ingest module and the unit tests.
@author Matthias Muntwiler
@copyright (c) 2021 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import datetime
from sqlalchemy import create_engine
from sqlalchemy import event
from sqlalchemy import Column, Sequence, ForeignKey
from sqlalchemy import Boolean, Integer, Float, String, DateTime
from sqlalchemy.engine import Engine
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import object_session
from sqlalchemy.orm import relationship
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import validates
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.orm.exc import NoResultFound
import numpy as np
import sqlite3
from pmsco.dispatch import CalcID
import pmsco.database.util as db_util
# make sure sqlite understands numpy data types
sqlite3.register_adapter(np.float64, float)
sqlite3.register_adapter(np.float32, float)
sqlite3.register_adapter(np.int64, int)
sqlite3.register_adapter(np.int32, int)
Base = declarative_base()
engine = None
Session = sessionmaker()
class Project(Base):
"""
database object representing a project
@note there is an implicit constructor with keyword arguments that correspond to the attributes.
"""
## @var id
# (int, primary key) database id of the project
## @var name
# project name, should be short, must be unique within a project
## @var jobs
# collection of related jobs
#
# defines the relationship between Project and Job objects.
# the instance attribute maps job names (str) to Job objects.
__tablename__ = "Projects"
id = Column(Integer, Sequence('project_id_seq'), primary_key=True)
name = Column(String(50, collation='NOCASE'), nullable=False, unique=True)
code = Column(String(50, collation='NOCASE'))
jobs = relationship('Job', backref='project',
collection_class=attribute_mapped_collection('name'),
cascade="all, delete, delete-orphan", lazy='joined')
def __repr__(self):
return f'Project({repr(self.name), repr(self.code)})'
class Job(Base):
"""
database object representing a calculation job
a job object holds several descriptive values of a calculation job.
it also refers to a project.
tags are key-value pairs that describe the job in standardized terms.
they can provide a consistent classification scheme across jobs and projects.
for example, they can store special project arguments that may be important
to distinguish calculations in different stages or contexts.
the class also defines mapping and proxy objects that simplify the use of tags and models.
explicit creation of Tag and JobTag objects is then not necessary.
@attention after modifying the mapped collections job_tags, tags or models
make sure to call flush() or commit() on the session
before accessing those mappings in other objects
else integrity errors may occur!
"""
## @var id
# (int, primary key) database id of the job
## @var project_id
# (int, foreign key) database id of the related project
## @var name
# job name, should be short, must be unique within a project
## @var mode
# pmsco calculation mode
## @var machine
# name of the computing facility
## @var git_hash
# git hash of the used code if under version control
## @var datetime
# start date and time of the job, ISO format (yyyy-mm-dd hh:mm:ss)
## @var processes
# number of processes
## @var hours
# job run time (wall time) in hours
## @var description
# up to the user
## @var job_tags
# collection of related job tags
#
# defines the relationship between Job and JobTag objects.
# the instance attribute maps tag keys (str) to JobTag objects.
## @var tags
# collection of tags
#
# maps tag keys (str) to tag values (str).
# this is an association proxy of job_tags.
## @var models
# collection of related models
#
# defines the relationship between Job and Model objects.
# the instance attribute maps model numbers to Model objects
__tablename__ = "Jobs"
id = Column(Integer, Sequence('job_id_seq'), primary_key=True)
project_id = Column(Integer, ForeignKey('Projects.id'), index=True)
name = Column(String(50, collation='NOCASE'), nullable=False)
mode = Column(String(20, collation='NOCASE'))
machine = Column(String(50, collation='NOCASE'))
git_hash = Column(String(50, collation='NOCASE'))
datetime = Column(String(50))
processes = Column(Integer)
hours = Column(Float)
description = Column(String(200, collation='NOCASE'))
job_tags = relationship('JobTag', back_populates='job',
collection_class=attribute_mapped_collection('tag_key'),
cascade="all, delete, delete-orphan")
# mapping tag_key -> tag_value
tags = association_proxy('job_tags', 'value', creator=lambda k, v: JobTag(key=k, value=v))
models = relationship('Model', back_populates='job',
collection_class=attribute_mapped_collection('model'),
cascade="all, delete, delete-orphan")
def __repr__(self):
try:
project_name = repr(self.project.name)
except AttributeError:
project_name = None
try:
job_name = repr(self.name)
except AttributeError:
job_name = None
return f'Job({project_name}, {job_name}, {repr(self.mode)})'
class Tag(Base):
"""
database object representing a tag name
"""
## @var id
# (int, primary key) database id of the tag name
## @var key
# tag name/key, should be short, must be unique
## @var tag_jobs
# collection of related JobTag objects
#
# defines the relationship between Tag and JobTag objects.
__tablename__ = "Tags"
id = Column(Integer, Sequence('tag_id_seq'), primary_key=True)
key = Column(String(20, collation='NOCASE'), nullable=False, unique=True)
tag_jobs = relationship('JobTag', back_populates='tag', cascade="all, delete, delete-orphan")
def __init__(self, key):
self.key = key
def __repr__(self):
return f'Tag({repr(self.key)})'
class JobTag(Base):
"""
association object class for job tags
Job - Tag is a many-to-many relationship built using this association class.
by using the dictionary-like Job.tags proxy, explicit creation of association objects can be avoided.
the class applies the
[UniqueObjectValidateOnPending pattern](https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObjectValidatedOnPending)
to look up existing tags in the database when a Tag object is needed and only the key is given.
"""
## @var id
# (int, primary key) database id of the job tag
## @var tag_id
# (int, foreign key) database id of the related tag name
## @var job_id
# (int, foreign key) database id of the related job
## @var value
# value (str) of the job tag
## @var tag
# associated Tag object
#
# defines the relationship between JobTag and Tag objects
## @var job
# associated Job object
#
# defines the relationship between JobTag and Job objects
## @var tag_key
# key (name) of the asscoiated Tag object
#
# this is an association proxy that provides direct access to tag.key
# or links to or creates a Tag object behind the scenes.
__tablename__ = "JobTags"
id = Column(Integer, Sequence('jobtag_id_seq'), primary_key=True)
tag_id = Column(Integer, ForeignKey('Tags.id'), index=True)
job_id = Column(Integer, ForeignKey('Jobs.id'), index=True)
value = Column(String(200, collation='NOCASE'))
tag = relationship("Tag", back_populates="tag_jobs")
job = relationship("Job", back_populates="job_tags")
tag_key = association_proxy("tag", "key")
def __init__(self, key=None, value=None):
if key is not None:
self.tag_key = key
self.value = value
@validates("tag")
def _validate_tag(self, key, value):
"""
receive the event that occurs when `jobtag.tag` is set.
if the object is present in a Session, then make sure it's the Tag
object that we looked up from the database.
otherwise, do nothing and we'll fix it later when the object is
put into a Session.
@param key: attribute name, i.e., 'tag'
@param value: a JobTag object
"""
sess = object_session(self)
if sess is not None:
return _setup_tag(sess, value)
else:
return value
@event.listens_for(Session, "transient_to_pending")
def _validate_tag(session, object_):
"""
receive a JobTag object when it gets attached to a Session to correct its unique Tag relationship.
"""
if isinstance(object_, JobTag):
if object_.tag is not None and object_.tag.id is None:
old_tag = object_.tag
new_tag = _setup_tag(session, object_.tag)
if new_tag is not old_tag:
if old_tag in session:
session.expunge(old_tag)
object_.tag = new_tag
def _setup_tag(session, tag_object):
"""
given a Session and a Tag object, return the correct Tag object from the database.
"""
with session.no_autoflush:
try:
return session.query(Tag).filter_by(key=tag_object.key).one()
except NoResultFound:
return tag_object
class Model(Base):
"""
database object representing a model
the object holds the model number (which is unique within the context of a single job only),
the diagnostic generation and particle values, and refers to the job where the model is used.
the class also defines relationship properties that simplify access to referenced objects.
for instance, parameter values can be accessed via the values['param_key'] mapping proxy.
examples:
~~~~~~{.py}
model = Model(model=10, gen=5, particle=2)
model.job = job1_object
model.values['dA'] = 25.6
model.deltas['dA'] = 0.1
pv = ParamValue(value=39.0, delta=-0.3)
model.param_values['dB'] = pv
result = Result(calc_id=calc_id, rfac=0.77)
model.results.append(result)
~~~~~~
@attention after modifying the mapped collections param_values, values or deltas,
make sure to call flush() or commit() on the session
before accessing those mappings in another model
else integrity errors may occur!
"""
## @var id
# (int, primary key) database id of the model
## @var job_id
# (int, foreign key) database id of the related job
## @var model
# (int) model number as used in the task index of pmsco
#
# @note the model number is not unique in the database as multiple jobs can produce same task indices.
# the unique number, self.id is not used in pmsco code.
## @var gen
# (int) generation number assigned by some optimizers. defaults to None.
## @var particle
# (int) particle number assigned by some optimizers. defaults to None.
## @var job
# associated Job
#
# defines the relationship between Model and Job objects.
## @var results
# collection of Result objects
#
# defines the relationship between Model and Result objects.
## @var param_values
# collection of ParamValue objects
#
# defines the relationship between Model and ParamValue objects.
# the instance attribute maps parameter keys to ParamValue objects.
## @var values
# collection of parameter values
#
# this is an association proxy that maps parameter keys to parameter values (ParamValue.value).
# ParamValue objects are accessed and created behind the scene.
## @var deltas
# collection of delta values
#
# this is an association proxy that maps parameter keys to parameter deltas (ParamValue.delta.
# ParamValue objects are accessed and created behind the scene.
__tablename__ = "Models"
id = Column(Integer, Sequence('model_id_seq'), primary_key=True)
job_id = Column(Integer, ForeignKey('Jobs.id'), index=True)
model = Column(Integer, index=True)
gen = Column(Integer)
particle = Column(Integer)
job = relationship("Job", back_populates="models")
results = relationship('Result', back_populates='model', cascade="all, delete, delete-orphan")
# mapping param_key -> ParamValue object
param_values = relationship('ParamValue', back_populates='model',
collection_class=attribute_mapped_collection('param_key'),
cascade="all, delete, delete-orphan")
# mapping param_key -> param_value
values = association_proxy('param_values', 'value', creator=lambda k, v: ParamValue(key=k, value=v))
deltas = association_proxy('param_values', 'delta', creator=lambda k, v: ParamValue(key=k, delta=v))
def __repr__(self):
return f'Model(id={repr(self.id)}, job_id={repr(self.job_id)}, model={repr(self.model)})'
def as_dict(self):
"""
object properties in a dictionary.
the dictionary keys correspond to the column names of numpy arrays.
the mapping db_field -> column name is declared in pmsco.database.util.DB_SPECIAL_PARAMS
@return: (dict)
"""
d = {'_db_model_id': self.id}
for attr, key in db_util.DB_SPECIAL_PARAMS.items():
try:
d[key] = getattr(self, attr)
except AttributeError:
pass
return d
class Result(Base):
"""
database object representing a calculation result
the result object holds the calculated R-factor per job and calculation index.
the calculation index (CalcID) is not unique in the database because it may contain results from multiple jobs.
thus, the object links to a Model object which is unique.
the calc_id property can be used to reconstruct a CalcID.
"""
## @var id
# (int, primary key) database id of the result
## @var model_id
# (int, foreign key) database id of the related model
## @var model
# associated Model object
#
# defines the relationship between Result and Model objects.
#
# @attention do not confuse the Result.model and Model.model attributes of same name!
# to obtain the model number to which a result belongs, use Result.model.model.
## @var scan
# (int) scan index as used in the calculations
## @var domain
# (int) domain index as used in the calculations
## @var emit
# (int) emitter index as used in the calculations
## @var region
# (int) region index as used in the calculations
## @var rfac
# (float) calculated R-factor
## @var timestamp
# (float) end date and time of this calculation task
#
# the float value represents seconds since jan 1, 1970 (datetime.datetime.timestamp).
# the datetime proxy converts to and from python datetime.datetime.
## @var datetime
# (datetime.datetime) end date and time of this calculation task
#
# this is a conversion proxy for timestamp.
## @var secs
# (float) total duration of the calculation task in seconds
#
# total cpu time necessary to get this result (including child tasks) in seconds.
## @var calc_id
# (CalcID) calculation task index
#
# conversion proxy for the task index components.
#
# on assignment, the scan, domain, emit and region attributes are updated.
# it does not update the model index as it is not stored by this object!
# the model index must be set separately in the linked Model object.
__tablename__ = "Results"
id = Column(Integer, Sequence('result_id_seq'), primary_key=True)
model_id = Column(Integer, ForeignKey('Models.id'), index=True)
scan = Column(Integer, index=True)
domain = Column(Integer, index=True)
emit = Column(Integer, index=True)
region = Column(Integer, index=True)
rfac = Column(Float)
timestamp = Column(Float)
secs = Column(Float)
model = relationship("Model", back_populates="results")
def __init__(self, calc_id=None, scan=None, domain=None, emit=None, region=None,
rfac=None, timestamp=None, secs=None):
if calc_id is not None:
self.calc_id = calc_id
else:
self.scan = scan
self.domain = domain
self.emit = emit
self.region = region
self.rfac = rfac
self.timestamp = timestamp
self.secs = secs
def __repr__(self):
return f'Result(model_id={repr(self.model_id)}, calc_id={repr(self.calc_id)}, rfac={repr(self.rfac)})'
@property
def calc_id(self):
return CalcID(self.model.model, self.scan, self.domain, self.emit, self.region)
@calc_id.setter
def calc_id(self, calc_id):
self.scan = calc_id.scan
self.domain = calc_id.domain
self.emit = calc_id.emit
self.region = calc_id.region
@property
def datetime(self):
return datetime.datetime.fromtimestamp(self.timestamp)
@datetime.setter
def datetime(self, value):
self.timestamp = value.timestamp()
def as_dict(self):
"""
object properties in a dictionary.
the dictionary keys correspond to the column names of numpy arrays.
the mapping db_field -> column name is declared in pmsco.database.util.D.B_SPECIAL_PARAMS
@return: (dict)
"""
d = {'_db_result_id': self.id}
for attr, key in db_util.DB_SPECIAL_PARAMS.items():
try:
d[key] = getattr(self, attr)
except AttributeError:
pass
return d
class Param(Base):
"""
database object representing a parameter
the parameter object holds the name (or key) of a calculation parameter.
explicit creation of parameter objects can be avoided by using the mappings of the Model class.
"""
## @var id
# (int, primary key) database id of the parameter name
## @var key
# parameter name/key as used in calculations, should be very short, must be unique
## @var param_values
# collection of related ParamValue objects
#
# defines the relationship between Param and ParamValue objects.
__tablename__ = "Params"
id = Column(Integer, Sequence('param_id_seq'), primary_key=True)
key = Column(String(20, collation='NOCASE'), nullable=False, unique=True)
param_values = relationship('ParamValue', back_populates='param', cascade="all, delete, delete-orphan")
def __init__(self, key):
self.key = key
def __repr__(self):
return f'Param({repr(self.key)})'
class ParamValue(Base):
"""
association object class for parameter values
Model - Param is a many-to-many relationship built using this association class.
by using the dictionary-like Model.values and Model.deltas proxies,
explicit creation of association objects can be avoided.
the class applies the
[UniqueObjectValidateOnPending pattern](https://github.com/sqlalchemy/sqlalchemy/wiki/UniqueObjectValidatedOnPending)
to look up existing params in the database when a Param object is needed and only the key is given.
"""
## @var id
# (int, primary key) database id of the parameter value
## @var param_id
# (int, foreign key) database id of the related parameter name
## @var model_id
# (int, foreign key) database id of the related model
## @var value
# (float) numeric value of the parameter
## @var delta
# (float) numeric delta value of the parameter (reported by some optimizers)
## @var param
# associated Param object
#
# defines the relationship between ParamValue and Param objects
## @var model
# associated Model object
#
# defines the relationship between ParamValue and Model objects
## @var param_key
# key (name) of the asscoiated Param object
#
# this is an association proxy that provides direct access to param.key.
# it accesses or creates Param objects behind the scenes.
__tablename__ = "ParamValues"
id = Column(Integer, Sequence('paramvalue_id_seq'), primary_key=True)
param_id = Column(Integer, ForeignKey('Params.id'), index=True)
model_id = Column(Integer, ForeignKey('Models.id'), index=True)
value = Column(Float)
delta = Column(Float)
param = relationship("Param", back_populates="param_values")
model = relationship("Model", back_populates="param_values")
param_key = association_proxy('param', 'key')
def __init__(self, model=None, param=None, key=None, value=None, delta=None):
if model is not None:
self.model = model
if param is not None:
self.param = param
elif key is not None:
self.param_key = key
self.value = value
self.delta = delta
@validates("param")
def _validate_param(self, key, value):
"""
receive the event that occurs when `paramvalue.param` is set.
if the object is present in a Session, then make sure it's the Param
object that we looked up from the database.
otherwise, do nothing and we'll fix it later when the object is put into a Session.
"""
sess = object_session(self)
if sess is not None:
return _setup_param(sess, value)
else:
return value
@event.listens_for(Session, "transient_to_pending")
def _validate_param(session, object_):
"""
receive a ParamValue object when it gets attached to a Session to correct its unique Param relationship.
"""
if isinstance(object_, ParamValue):
if object_.param is not None and object_.param.id is None:
old_param = object_.param
new_param = _setup_param(session, object_.param)
if new_param is not old_param:
if old_param in session:
session.expunge(old_param)
object_.param = new_param
def _setup_param(session, param_object):
"""
given a Session and a Tag object, return the correct Tag object from the database.
"""
with session.no_autoflush:
try:
return session.query(Param).filter_by(key=param_object.key).one()
except NoResultFound:
return param_object
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
"""
set sqlite pragmas.
make sure sqlite enforces relational integrity.
@param dbapi_connection:
@param connection_record:
@return:
"""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
def sqlite_link(path=None):
"""
format the sqlalchemy link to an sqlite3 database.
@param path: file path. if empty, an in-memory database is created.
@return: (str) database link for the sqlalchemy engine.
"""
if not path:
path = ':memory:'
return f'sqlite:///{path}'
def connect(db_link):
"""
connect to the database.
create the sqlalchemy engine and bind the session maker.
the database engine and session maker are global.
this function should be called only once in a process.
@param db_link: (str) database link expected by the sqlalchemy engine
@return: None
"""
global engine
engine = create_engine(db_link, echo=False)
Base.metadata.create_all(engine)
Session.configure(bind=engine)

158
pmsco/database/project.py Normal file
View File

@@ -0,0 +1,158 @@
"""
@package pmsco.database.project
wrapper class for project-specific database operations
usage:
~~~~~~{.py}
db = DatabaseAccess()
db.connect("file.db")
with db.session():
# database access here
# ...
# commit transaction
session.commit()
# continue in new transaction
# ...
# at the end of the context
# the session is closed and orm objects are detached from the database.
~~~~~~
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2016-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import datetime
import logging
import socket
from pmsco.database.access import DatabaseAccess
import pmsco.database.common as db_common
import pmsco.database.ingest as db_ingest
import pmsco.database.query as db_query
from pmsco.dispatch import mpi_size
logger = logging.getLogger(__name__)
class ProjectDatabase(DatabaseAccess):
"""
wrapper class for project specific database operations
the purpose of this class is to bundle all specific code and run-time information
for database access of a running calculation job.
after calling ingest_project_metadata(),
the class object stores the persistent project and job identifiers.
the other methods provide convenient wrappers so that database code can be kept minimal in the project.
usage:
~~~~~~{.py}
db = ProjectDatabase()
db.connect('file.db')
db.ingest_project_metadata(...)
for result in results:
db.ingest_result(result...)
~~~~~~
"""
def __init__(self):
super().__init__()
self.db_project_id = None
self.db_job_id = None
def ingest_project_metadata(self, project):
"""
ingest project metadata into the database
@param project: pmsco.project.Project object
@return: None
"""
with self.session() as session:
db_project = db_common.register_project(session=session,
name=project.project_name,
code=project.__module__,
allow_existing=True)
db_job = db_common.register_job(session=session,
project=db_project,
job_name=project.job_name,
allow_existing=False,
mode=project.mode,
machine=socket.gethostname(),
git_hash=project.git_hash,
datetime=datetime.datetime.now(),
processes=mpi_size,
hours=project.timedelta_limit.total_seconds() / 3600.,
description=project.description)
db_common.register_job_tags(session, db_job, project.job_tags)
db_common.register_params(session, project.model_space.start.keys())
session.commit()
self.db_project_id = db_project.id
self.db_job_id = db_job.id
def ingest_result(self, index, result, delta):
"""
add or update a result in the database.
the method updates the Models, Results and ParamValues tables.
the model is identified by self.job_id and index.model.
the result is identified by self.job_id and index.
if the model or result exists in the database, it is updated.
@param index: (pmsco.dispatch.CalcID or dict)
calculation index.
in case of dict, the keys must be the attribute names of CalcID prefixed with an underscore, i.e.,
'_model', '_scan', '_domain', '_emit', '_region'.
extra values in the dictionary are ignored.
undefined indices must be -1.
@param result: (dict) dictionary containing the parameter values and the '_rfac' result.
may also contain the special values '_gen', '_particle', '_timestamp'.
'_gen' and '_particle' are integers and default to None.
'_timestamp' can be numeric (seconds since jan 1, 1970)
or an object that implements a timestamp function like datetime.datetime.
it defaults to the current (local) time.
@param delta: (dict) dictionary containing the delta values.
the keys must correspond to model keys in the result dictionary.
this argument is optional.
"""
assert self.db_project_id is not None
assert self.db_job_id is not None
with self.session() as session:
job_obj = db_common.get_job(session, self.db_project_id, self.db_job_id)
model_obj = db_ingest.store_model(session, job_obj, index, result)
db_ingest.store_result_data(session, model_obj, index, result)
db_ingest.store_param_values(session, model_obj, result, delta)
session.commit()
def query_best_task_models(self, level, count):
"""
query N best models per task.
this is a wrapper for pmsco.database.query.query_best_task_models().
in addition to the wrapped function, it opens a session and uses the registered db_job_id.
this query is used by the file tracker to determine the models to keep.
@param level: level up to which to query.
the level can be specified by level name (str) or numeric index (0..4).
if it is scan (equivalent to 1), the method queries the model and scan levels.
@param count: number of models to query per task.
@return set of matching model numbers (model index, Models.model field).
"""
with self.session() as session:
models = db_query.query_best_task_models(session, self.db_job_id, level, count)
return models

470
pmsco/database/query.py Normal file
View File

@@ -0,0 +1,470 @@
"""
@package pmsco.database.query
specialized query functions for the pmsco database
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2016-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import logging
import numpy as np
from sqlalchemy import func
import pmsco.database.orm as orm
import pmsco.database.util as util
import pmsco.dispatch as dispatch
logger = logging.getLogger(__name__)
def query_newest_job(session):
"""
retrieve the entry of the newest job
the newest entry is determined by the datetime field.
@param session:
@return: pmsco.database.orm.Job object
"""
q = session.query(orm.Job)
q = q.order_by(orm.Job.datetime.desc(), orm.Job.id.desc())
job = q.first()
return job
def query_model(session, job_id=None, model_id=None, model=None):
"""
retrieve model parameters and control variables from the database.
@param model_id: id of the model in the database.
@return: (dict, dict) value dictionary and delta dictionary.
dictionary keys are parameter values.
the special value '_model' is included.
"""
query = session.query(orm.ParamValue)
if job_id is not None:
query = query.filter(orm.Job.id == job_id)
if model_id is not None:
query = query.filter(orm.Model.id == model_id)
if model is not None:
query = query.filter(orm.Model.model == model)
result = query.all()
param_value = {}
param_delta = {}
model_obj = None
for pv in result:
if model_obj is None:
model_obj = pv.model
param_value[pv.param.key] = pv.value
param_delta[pv.param.key] = pv.delta
param_value['_model_id'] = model_obj.id
param_value['_model'] = model_obj.model
param_value['_gen'] = model_obj.gen
param_value['_particle'] = model_obj.particle
param_delta['_model_id'] = model_obj.id
param_delta['_model'] = model_obj.model
param_delta['_gen'] = model_obj.gen
param_delta['_particle'] = model_obj.particle
return param_value, param_delta
def query_results(session, job_id):
query = session.query(orm.Result)
query = query.join(orm.Model)
query = query.filter(orm.Job == job_id)
return None
def query_tasks(session, job_id):
"""
query the task index used in a calculation job.
this query neglects the model index
and returns the unique tuples (-1, scan, domain, emit, region).
@param job_id: (int) id of the associated Jobs entry.
@return list of pmsco.dispatch.CalcID tuples of task indices.
the model attribute is -1 in all elements.
"""
query = session.query(orm.Result.scan, orm.Result.domain, orm.Result.emit, orm.Result.region)
query = query.join(orm.Model)
query = query.filter(orm.Model.job_id == job_id)
query = query.distinct()
query = query.order_by(orm.Result.scan, orm.Result.domain, orm.Result.emit, orm.Result.region)
results = query.all()
output = []
for row in results:
d = row._asdict()
d['model'] = -1
output.append(dispatch.CalcID(**d))
return output
def query_best_task_models(session, job_id, level, count):
"""
query N best models per task.
this query is used by the file tracker to determine the models to keep.
@param job_id: (int) id of the associated Jobs entry.
@param level: level up to which to query.
the level can be specified by level name (str) or numeric index (0..4).
if it is scan (equivalent to 1), the method queries the model and scan levels.
@param count: number of models to query per task.
@return set of matching model numbers (Models.model field).
"""
try:
level = int(level)
except ValueError:
level = dispatch.CALC_LEVELS.index(level)
assert 0 <= level < len(dispatch.CALC_LEVELS)
def _query_models(t):
query = session.query(orm.Model.model).join(orm.Job).join(orm.Result)
query = query.filter(orm.Job.id == job_id)
query = query.filter(orm.Result.scan == t.scan)
query = query.filter(orm.Result.domain == t.domain)
query = query.filter(orm.Result.emit == t.emit)
query = query.filter(orm.Result.region == t.region)
query = query.order_by(orm.Result.rfac)
results = query[0:count]
return set((row.model for row in results))
tasks = query_tasks(session, job_id)
models = set()
for task in tasks:
if task.numeric_level <= level:
q_models = _query_models(task)
models |= q_models
return models
def query_model_params_array(session, jobs=None, models=None, order=None, limit=None):
"""
query parameter values and return them in a numpy array
the models table can be filtered by job and/or model.
else, the whole database is returned (which might be huge!).
@param session:
@param jobs: filter by job.
the argument can be a singleton or sequence of orm.Job objects or numeric id.
@param models: filter by model.
the argument can be a singleton or sequence of orm.Model objects or their id.
@param order: ordering of results. this can be a sequence of orm.Model attributes.
the default order is by job_id and model.
@param limit: maximum number of models to return
@return: dict['values']: numpy values array, dict['deltas']: numpy deltas array
"""
count_query = session.query(orm.Model)
pn_query = session.query(orm.Param.key)
pv_query = session.query(orm.ParamValue)
if jobs:
try:
jobs = [int(jobs)]
except TypeError:
pass
job_ids = [j if isinstance(j, int) else j.id for j in jobs]
count_query = count_query.filter(orm.Model.job_id.in_(job_ids))
pn_query = pn_query.filter(orm.Model.job_id.in_(job_ids))
pv_query = pv_query.filter(orm.Model.job_id.in_(job_ids))
if models:
try:
models = [int(models)]
except TypeError:
pass
model_ids = [m if isinstance(m, int) else m.id for m in models]
count_query = count_query.filter(orm.ParamValue.model_id.in_(model_ids))
pn_query = pn_query.filter(orm.ParamValue.model_id.in_(model_ids))
pv_query = pv_query.filter(orm.ParamValue.model_id.in_(model_ids))
if order is not None:
pv_query = pv_query.order_by(*order)
else:
pv_query = pv_query.order_by(orm.Model.job_id, orm.Model.model)
if limit:
pv_query = pv_query[0:limit]
n_models = count_query.count()
param_names = pn_query.all()
param_values = pv_query.all()
special_names = orm.Model().as_dict().keys()
dt_names = special_names + param_names
dt = np.dtype([(n, util.field_to_numpy_type(n)) for n in sorted(dt_names, key=str.lower)])
values = np.zeros((n_models,), dtype=dt)
deltas = np.zeros((n_models,), dtype=dt)
for i, pv in enumerate(param_values):
for k, v in pv.model.as_dict():
values[i][k] = deltas[i][k] = v
values[i][pv.param_key] = pv.value
deltas[i][pv.param_key] = pv.delta
return {'values': values, 'deltas': deltas}
calc_id_props = {'model': orm.Model.model,
'scan': orm.Result.scan,
'domain': orm.Result.domain,
'emit': orm.Result.emit,
'region': orm.Result.region}
def query_model_results_array(session, jobs=None, models=None, order=None, limit=None,
query_hook=None, hook_data=None, include_params=False, **index):
"""
query a results table with flexible filtering options
the function returns a structured numpy array of the results and, optionally, parameter values.
the database is fully flattened, row of the array represents one result.
the jobs and models arguments filter for specific jobs and/or models.
custom filters can be added in a query hook function.
the hook function receives an sqlalchemy Query object of the Result table,
joined with the Model and Job tables.
other joins must be added explicitly.
the hook function can add more filters and return the modified query.
the hook function is called after the filters from the other function arguments
(job, models, index) have been applied,
and before the ordering and limit are applied.
@param session:
@param jobs: filter by job.
the argument can be a singleton or sequence of orm.Job objects or numeric id.
@param models: filter by model.
the argument can be a singleton or sequence of orm.Model objects or their id.
@param order: ordering of results. this can be a sequence of orm.Result attributes.
the default order is by `orm.Result.rfac`.
to override the default ascending order, append a modifier, e.g., `orm.Result.rfac.desc()`.
@param limit: maximum number of models to return
@param query_hook: hook function that modifies an sqlalchemy.orm.Query object.
the function receives the query as first argument, and any data from hook_data as keyword arguments.
it must return the modified query object.
@param hook_data: (dict) keyword arguments to be passed to the query_hook function.
@param include_params: include parameter values of each model in the result.
by default, only data from the Model and Result records is included.
@param index: filters the results list by scan, domain, emit, and/or region index.
for example, to get only the final results per model, specify `scan=-1`.
@return: numpy values array
"""
results_query = session.query(orm.Result).join(orm.Model).join(orm.Job)
if jobs:
results_query = filter_objects(results_query, orm.Job, jobs)
if models:
results_query = filter_objects(results_query, orm.Model, models)
for k, v in index.items():
results_query = results_query.filter(calc_id_props[k] == v)
if query_hook is not None:
results_query = query_hook(results_query, **hook_data)
if order is not None:
results_query = results_query.order_by(*order)
if limit:
results = results_query[0:limit]
else:
results = results_query.all()
n_results = len(results)
logger.debug(f"query_model_results_array: {results_query.statement} ({n_results} rows)")
dt_names = [n for n in util.DB_SPECIAL_PARAMS.values()]
if include_params:
model_ids = {r.model_id for r in results}
pn_query = session.query(orm.Param.key).join(orm.ParamValue)
pn_query = pn_query.filter(orm.ParamValue.model_id.in_(model_ids))
pn_query = pn_query.distinct()
pn_query = pn_query.order_by(orm.Param.key)
p_names = [r.key for r in pn_query.all()]
dt_names.extend(p_names)
logger.debug(f"query_model_results_array: {pn_query.statement} ({len(p_names)} rows)")
dt = []
v0 = []
for n in dt_names:
ft = util.field_to_numpy_type(n)
dt.append((n, ft))
v0.append(np.nan if ft[0] == 'f' else 0)
dt = np.dtype(dt)
v0 = np.array([tuple(v0)], dtype=dt)
values_array = np.full((n_results,), v0, dtype=dt)
deltas_array = np.full((n_results,), v0, dtype=dt)
for i, r in enumerate(results):
d = {**r.as_dict(), **r.model.as_dict()}
for k, v in d.items():
try:
values_array[i][k] = v
except TypeError:
values_array[i][k] = 0
deltas_array[i] = values_array[i]
if include_params:
for k, v in r.model.values.items():
values_array[i][k] = v
for k, v in r.model.deltas.items():
deltas_array[i][k] = v
return values_array, deltas_array
def query_best_models_per_job(session, projects=None, jobs=None, task_level='model', order=None, limit=None):
"""
return the best model (by rfac) of each selected job
the query gathers the R-factors of the selected jobs at the selected task levels
and, for each job, returns the (database) model id where the lowest R-factor is reported
among the gathered results.
this can be useful if you want to compile a report of the best model per job.
@param session:
@param projects: filter by project.
the argument can be a singleton or sequence of orm.Project objects or numeric id.
@param jobs: filter by job.
the argument can be a singleton or sequence of orm.Job objects or numeric id.
@param task_level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
deepest task_level to include in the query.
results on deeper levels are not considered.
e.g. if you pass 'scan', R-factors of individual scans are included in the query.
note that including deeper levels will not increase the number of results returned.
the lowest level that can be specified is `emit`.
@param order: ordering of results. this can be a sequence of orm.Result attributes.
the default order is by `orm.Result.rfac`.
@param limit: maximum number of models to return
@return sequence of (orm.Model, orm.Result) tuples.
the number of results corresponds to the number of jobs in the filter scope.
to find out details of the models, execute another query that filters on these model ids.
the method produces an SQL query similar to:
@code{.sql}
select Models.id from Models
join Results on Models.id = Results.model_id
join Jobs on Models.job_id = Jobs.id
where scan=-1
and project_id=1
and job_id in (1,2,3)
group by Models.job_id
having min(rfac)
order by rfac
@endcode
"""
try:
level = dispatch.CALC_LEVELS.index(task_level) + 1
except ValueError:
level = task_level + 1
try:
level_name = dispatch.CALC_LEVELS[level]
except IndexError:
level_name = dispatch.CALC_LEVELS[4]
query = session.query(orm.Model, orm.Result).join(orm.Result)
if projects:
query = filter_objects(query, orm.Project, projects)
if jobs:
query = filter_objects(query, orm.Job, jobs)
query = query.filter(getattr(orm.Result, level_name) == -1)
query = query.group_by(orm.Model.job_id)
query = query.having(func.min(orm.Result.rfac))
if order is not None:
query = query.order_by(*order)
else:
query = query.order_by(orm.Result.rfac)
if limit:
query = query[0:limit]
else:
query = query.all()
return query
def filter_objects(query, entity, objects):
"""
filter a query for the given objects
apply a simple object filter to a database query.
the criteria can be a single object or a sequence of objects.
the objects can be specified either by their object representation or numeric id.
the query is filtered by id.
thus, in the first case, the objects must have a valid id.
@param query: sqlalchemy.orm.Query object that queries a table that is linked to the entity table.
the function joins the entity table.
a table with a direct foreign key relationship to the entity table must already be in the query.
@param entity: orm entity class, e.g. pmsco.database.orm.Project.
@param objects: singleton or sequence of orm objects or their numeric ids.
@return: modified query
"""
# avoid duplicate joins
if str(query.statement).find(entity.__tablename__) < 0:
query = query.join(entity)
try:
objects = [p if isinstance(p, int) else p.id for p in objects]
query = query.filter(entity.id.in_(objects))
except TypeError:
object = objects if isinstance(objects, int) else objects.id
query = query.filter(entity.id == object)
return query
def filter_task_levels(query, level='model', include_parents=False):
"""
refine a query by filtering by task level.
@param query: sqlalchemy.orm.Query object that queries the Result table
(possibly joined with others).
@param level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
deepest task_level to include in the query.
results on deeper levels are not considered.
e.g. if you pass 'scan', R-factors of individual scans are included in the query.
the lowest level that can be specified is `emit`.
@param include_parents: by default, the query will return only results from the given level.
if True, combined results (parents) will be returned as well.
"""
try:
level = dispatch.CALC_LEVELS.index(level)
except ValueError:
level = int(level)
child_level = level + 1
try:
child_level_name = dispatch.CALC_LEVELS[child_level]
level_name = dispatch.CALC_LEVELS[level]
except IndexError:
child_level_name = dispatch.CALC_LEVELS[4]
level_name = dispatch.CALC_LEVELS[3]
query = query.filter(getattr(orm.Result, child_level_name) == -1)
if not include_parents:
query = query.filter(getattr(orm.Result, level_name) >= 0)
return query

161
pmsco/database/util.py Normal file
View File

@@ -0,0 +1,161 @@
import logging
import numpy as np
from pathlib import Path
import pmsco.dispatch as dispatch
logger = logging.getLogger(__name__)
## mapping of database fields to special parameter names
#
# `_db` parameters are returned by some query methods to identify the database records.
#
DB_SPECIAL_PARAMS = {"project_id": "_db_project_id",
"job_id": "_db_job_id",
"model_id": "_db_model_id",
"result_id": "_db_result_id",
"model": "_model",
"scan": "_scan",
"domain": "_domain",
"emit": "_emit",
"region": "_region",
"gen": "_gen",
"particle": "_particle",
"rfac": "_rfac",
"secs": "_secs",
"timestamp": "_timestamp"}
## numpy data types of special parameters by database field
#
# this dictionary helps to create a numpy array from a database record.
#
DB_SPECIAL_NUMPY_TYPES = {"_db_project_id": "i8",
"_db_job_id": "i8",
"_db_model_id": "i8",
"_db_result_id": "i8",
"_model": "i8",
"_scan": "i8",
"_domain": "i8",
"_emit": "i8",
"_region": "i8",
"_gen": "i8",
"_particle": "i8",
"_rfac": "f8",
"_secs": "f8",
"_timestamp": "f8"}
def regular_params(d):
"""
filter regular parameters from dictionary
returns a dictionary containing only the regular parameters (those not prefixed with an underscore).
@param d: dict or numpy.void or pmsco.dispatch.CalcID.
the param names must have no leading underscore.
the numpy.void type occurs when an element of a structured array is extracted.
the CalcID does not contain a regular parameter and will return an empty dictionary.
it is supported only for compatibility with special_params function.
a tuple or list is interpreted as a sequence of parameter names.
in this case the names representing special parameters are returned with underscore removed.
@return: dict for mapping types (numpy and dict) containing the regular key: value pairs of the original object.
list (tuple) of parameter names for sequence (tuple) types.
leading underscores are removed from key names.
"""
if isinstance(d, np.void):
d = {k: d[k] for k in d.dtype.names if k[0] != "_"}
elif isinstance(d, dispatch.CalcID):
d = {}
elif isinstance(d, tuple):
d = [k for k in d if k[0] != "_"]
d = tuple(d)
elif isinstance(d, dict):
d = {k: v for k, v in d.items() if k[0] != "_"}
else:
d = [k for k in d if k[0] != "_"]
return d
def special_params(d):
"""
filter special parameters from model dictionary, numpy record or sequence.
special parameters are those prefixed with an underscore.
the underscore is removed from the keys.
fields starting with '_db_' are removed.
@param d: dict or numpy.void or pmsco.dispatch.CalcID or sequence.
in the case of a dict or numpy.void,
the key names of the special parameters must have a leading underscore.
the numpy.void type occurs when an element of a structured array is extracted.
in the case of a CalcID, the attribute names become the key names.
a tuple or list is interpreted as a sequence of parameter names.
in this case the names representing special parameters are returned with underscore removed.
@return
the return type depends on the type of input `d`:
@arg in the case of a dict, numpy.void or CalcID it is a dictionary.
@arg in the case of a tuple or list the return type is the same as the input.
"""
if isinstance(d, np.void):
d = {k[1:]: d[k] for k in d.dtype.names if k[0] == "_" and k[0:4] != "_db_"}
elif isinstance(d, dispatch.CalcID):
d = d._asdict()
elif isinstance(d, tuple):
d = [k[1:] for k in d if k[0] == "_" and k[0:4] != "_db_"]
d = tuple(d)
elif isinstance(d, dict):
d = {k[1:]: v for k, v in d.items() if k[0] == "_" and k[0:4] != "_db_"}
else:
d = [k[1:] for k in d if k[0] == "_" and k[0:4] != "_db_"]
return d
def field_to_param(f):
"""
translate database field name to parameter name.
field names of optimization parameters are unchanged.
special parameters are prefixed by '_' or '_db_'.
@param f: (str) database field name.
@return: (str) parameter name as used in model dictionaries.
"""
try:
p = DB_SPECIAL_PARAMS[f]
except KeyError:
p = f
return p
def field_to_numpy_type(f):
"""
determine the numpy data type string of a database field.
@param f: (str) database field name.
@return: (str) numpy type description, e.g. 'f8'.
"""
try:
t = DB_SPECIAL_NUMPY_TYPES[f]
except KeyError:
t = 'f8'
return t
def is_sqlite3_file(path_like):
"""
test whether a file is an sqlite3 database file.
@param path_like: file path (str or pathlib.Path).
@return: (bool)
"""
try:
with Path(path_like).open("rb") as f:
s = f.read(16)
return s == b"SQLite format 3\000"
except OSError:
return False

View File

@@ -4,16 +4,13 @@ calculation dispatcher.
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path
import datetime
@@ -21,8 +18,18 @@ import signal
import collections
import copy
import logging
from attrdict import AttrDict
try:
from mpi4py import MPI
mpi_comm = MPI.COMM_WORLD
mpi_size = mpi_comm.Get_size()
mpi_rank = mpi_comm.Get_rank()
except ImportError:
MPI = None
mpi_comm = None
mpi_size = 1
mpi_rank = 0
from pmsco.helpers import BraceMessage as BMsg
logger = logging.getLogger(__name__)
@@ -53,7 +60,7 @@ TAG_ERROR_ABORTING = 4
## levels of calculation tasks
#
CALC_LEVELS = ('model', 'scan', 'sym', 'emit', 'region')
CALC_LEVELS = ('model', 'scan', 'domain', 'emit', 'region')
## intermediate sub-class of CalcID
#
@@ -159,13 +166,13 @@ class CalculationTask(object):
@arg @c id.model structure number or iteration (handled by the mode module)
@arg @c id.scan scan number (handled by the project)
@arg @c id.sym symmetry number (handled by the project)
@arg @c id.domain domain number (handled by the project)
@arg @c id.emit emitter number (handled by the project)
@arg @c id.region region number (handled by the region handler)
specified members must be greater or equal to zero.
-1 is the wildcard which is used in parent tasks,
where, e.g., no specific symmetry is chosen.
where, e.g., no specific domain is chosen.
the root task has the ID (-1, -1, -1, -1, -1).
"""
@@ -181,6 +188,15 @@ class CalculationTask(object):
# this is typically initialized to the parameters of the parent task,
# and varied at the level where the task ID was produced.
## @var delta (dict)
# dictionary containing a delta vector of the model parameters.
#
# this is a diagnostic value of the optimizer, it is not used by calculators.
# if defined, it is entered into the results database (ParamValue.delta field).
#
# the exact meaning depends on the optimizer.
# in particle swarm, e.g., it is the current velocity of the particle.
## @var file_root (string)
# file name without extension and index.
@@ -249,6 +265,7 @@ class CalculationTask(object):
self.id = CalcID(-1, -1, -1, -1, -1)
self.parent_id = self.id
self.model = {}
self.delta = {}
self.file_root = ""
self.file_ext = ""
self.result_filename = ""
@@ -311,7 +328,8 @@ class CalculationTask(object):
format input or output file name including calculation index.
@param overrides optional keyword arguments override object fields.
the following keywords are handled: @c root, @c model, @c scan, @c sym, @c emit, @c region, @c ext.
the following keywords are handled:
`root`, `model`, `scan`, `domain`, `emit`, `region`, `ext`.
@return a string consisting of the concatenation of the base name, the ID, and the extension.
"""
@@ -322,7 +340,7 @@ class CalculationTask(object):
for key in overrides.keys():
parts[key] = overrides[key]
filename = "{root}_{model}_{scan}_{sym}_{emit}_{region}{ext}".format(**parts)
filename = "{root}_{model}_{scan}_{domain}_{emit}_{region}{ext}".format(**parts)
return filename
def copy(self):
@@ -462,7 +480,7 @@ class CachedCalculationMethod(object):
def wrapped_func(inst, model, index):
# note: _replace returns a new instance of the namedtuple
index = index._replace(emit=-1, region=-1)
cache_index = (id(inst), index.model, index.scan, index.sym)
cache_index = (id(inst), index.model, index.scan, index.domain)
try:
result = self._cache[cache_index]
except KeyError:
@@ -490,6 +508,16 @@ class CachedCalculationMethod(object):
del self._cache[index]
class AttrDict(collections.UserDict):
def __getattr__(self, key):
return self.__getitem__(key)
def __setattr__(self, key, value):
if key == "data":
return super().__setattr__(key, value)
return self.__setitem__(key, value)
class MscoProcess(object):
"""
code shared by MscoMaster and MscoSlave.
@@ -518,8 +546,7 @@ class MscoProcess(object):
#
# the default is 2 days after start.
def __init__(self, comm):
self._comm = comm
def __init__(self):
self._project = None
self._atomic_scattering = None
self._multiple_scattering = None
@@ -565,6 +592,8 @@ class MscoProcess(object):
"""
clean up after all calculations.
this method must be called after run() has finished.
@return: None
"""
pass
@@ -693,7 +722,7 @@ class MscoProcess(object):
parameters generation is delegated to the project's create_params method.
@param task: CalculationTask with all attributes set for the calculation.
@return: pmsco.project.Params object for the calculator.
@return: pmsco.project.CalculatorParams object for the calculator.
"""
par = self._project.create_params(task.model, task.id)
@@ -711,7 +740,7 @@ class MscoProcess(object):
@param task: CalculationTask with all attributes set for the calculation.
@param par: pmsco.project.Params object for the calculator.
@param par: pmsco.project.CalculatorParams object for the calculator.
its phase_files attribute is updated with the created scattering files.
the radial matrix elements are not changed (but may be in a future version).
@@ -740,7 +769,7 @@ class MscoProcess(object):
calculate the multiple scattering intensity.
@param task: CalculationTask with all attributes set for the calculation.
@param par: pmsco.project.Params object for the calculator.
@param par: pmsco.project.CalculatorParams object for the calculator.
@param clu: pmsco.cluster.Cluster object for the calculator.
@return: None
"""
@@ -820,16 +849,16 @@ class MscoMaster(MscoProcess):
## @var task_handlers
# (AttrDict) dictionary of task handler objects
#
# the keys are the task levels 'model', 'scan', 'sym', 'emit' and 'region'.
# the keys are the task levels 'model', 'scan', 'domain', 'emit' and 'region'.
# the values are handlers.TaskHandler objects.
# the objects can be accessed in attribute or dictionary notation.
def __init__(self, comm):
super(MscoMaster, self).__init__(comm)
def __init__(self):
super().__init__()
self._pending_tasks = collections.OrderedDict()
self._running_tasks = collections.OrderedDict()
self._complete_tasks = collections.OrderedDict()
self._slaves = self._comm.Get_size() - 1
self._slaves = mpi_size - 1
self._idle_ranks = []
self.max_calculations = 1000000
self._calculations = 0
@@ -854,12 +883,18 @@ class MscoMaster(MscoProcess):
the method notifies the handlers of the number of available slave processes (slots).
some of the tasks handlers adjust their branching according to the number of slots.
this mechanism may be used to balance the load between the task levels.
however, the current implementation is very coarse in this respect.
it advertises all slots to the model handler but a reduced number to the remaining handlers
depending on the operation mode.
the region handler receives a maximum of 4 slots except in single calculation mode.
in single calculation mode, all slots can be used by all handlers.
this mechanism may be used to adjust the priorities of the task levels,
i.e., whether one slot handles all calculations of one model
so that all models of a generation finish around the same time,
or whether a model is finished completely before the next one is calculated
so that a result is returned as soon as possible.
the current algorithm tries to pass as many slots as available
down to the lowest level (region) in order to minimize wall time.
the lowest level is restricted to the minimum number of splits
only if the intermediate levels create a lot of branches,
in which case splitting scans would not offer a performance benefit.
"""
super(MscoMaster, self).setup(project)
@@ -868,8 +903,8 @@ class MscoMaster(MscoProcess):
self._idle_ranks = list(range(1, self._running_slaves + 1))
self._root_task = CalculationTask()
self._root_task.file_root = project.output_file
self._root_task.model = project.create_domain().start
self._root_task.file_root = str(project.output_file)
self._root_task.model = project.model_space.start
for level in self.task_levels:
self.task_handlers[level] = project.handler_classes[level]()
@@ -877,14 +912,22 @@ class MscoMaster(MscoProcess):
self.task_handlers.model.datetime_limit = self.datetime_limit
slaves_adj = max(self._slaves, 1)
self.task_handlers.model.setup(project, slaves_adj)
if project.mode != "single":
slaves_adj = max(slaves_adj / 2, 1)
self.task_handlers.scan.setup(project, slaves_adj)
self.task_handlers.sym.setup(project, slaves_adj)
self.task_handlers.emit.setup(project, slaves_adj)
if project.mode != "single":
n_models = self.task_handlers.model.setup(project, slaves_adj)
if n_models > 1:
slaves_adj = max(int(slaves_adj / 2), 1)
n_scans = self.task_handlers.scan.setup(project, slaves_adj)
if n_scans > 1:
slaves_adj = max(int(slaves_adj / 2), 1)
n_doms = self.task_handlers.domain.setup(project, slaves_adj)
if n_doms > 1:
slaves_adj = max(int(slaves_adj / 2), 1)
n_emits = self.task_handlers.emit.setup(project, slaves_adj)
if n_emits > 1:
slaves_adj = max(int(slaves_adj / 2), 1)
n_extra = max(n_scans, n_doms, n_emits)
if n_extra > slaves_adj * 2:
slaves_adj = min(slaves_adj, 4)
logger.debug(BMsg("{regions} slots available for region handler", regions=slaves_adj))
self.task_handlers.region.setup(project, slaves_adj)
project.setup(self.task_handlers)
@@ -911,6 +954,7 @@ class MscoMaster(MscoProcess):
else:
self._dispatch_tasks()
self._receive_result()
self._cleanup_tasks()
self._check_finish()
logger.debug("master exiting main loop")
@@ -918,12 +962,32 @@ class MscoMaster(MscoProcess):
self._save_report()
def cleanup(self):
"""
clean up after all calculations.
this method must be called after run() has finished.
in the master process, this calls cleanup() of each task handler and of the project.
@return: None
"""
logger.debug("master entering cleanup")
for level in reversed(self.task_levels):
self.task_handlers[level].cleanup()
self._project.cleanup()
super(MscoMaster, self).cleanup()
def _cleanup_tasks(self):
"""
periodic clean-up in the main loop.
once per iteration of the main loop, this method cleans up unnecessary files.
this is done by the project's cleanup_files() method.
@return: None
"""
self._project.cleanup_files()
def _dispatch_results(self):
"""
pass results through the post-processing modules.
@@ -993,7 +1057,7 @@ class MscoMaster(MscoProcess):
else:
logger.debug("assigning task %s to rank %u", str(task.id), rank)
self._running_tasks[task.id] = task
self._comm.send(task.get_mpi_message(), dest=rank, tag=TAG_NEW_TASK)
mpi_comm.send(task.get_mpi_message(), dest=rank, tag=TAG_NEW_TASK)
self._calculations += 1
else:
if not self._finishing:
@@ -1015,7 +1079,7 @@ class MscoMaster(MscoProcess):
while self._idle_ranks:
rank = self._idle_ranks.pop()
logger.debug("send finish tag to rank %u", rank)
self._comm.send(None, dest=rank, tag=TAG_FINISH)
mpi_comm.send(None, dest=rank, tag=TAG_FINISH)
self._running_slaves -= 1
def _receive_result(self):
@@ -1025,7 +1089,7 @@ class MscoMaster(MscoProcess):
if self._running_slaves > 0:
logger.debug("waiting for calculation result")
s = MPI.Status()
data = self._comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
data = mpi_comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=s)
if s.tag == TAG_NEW_RESULT:
task_id = self._accept_task_done(data)
@@ -1122,9 +1186,9 @@ class MscoMaster(MscoProcess):
scan_tasks = self.task_handlers.scan.create_tasks(task)
for scan_task in scan_tasks:
sym_tasks = self.task_handlers.sym.create_tasks(scan_task)
for sym_task in sym_tasks:
emitter_tasks = self.task_handlers.emit.create_tasks(sym_task)
dom_tasks = self.task_handlers.domain.create_tasks(scan_task)
for dom_task in dom_tasks:
emitter_tasks = self.task_handlers.emit.create_tasks(dom_task)
for emitter_task in emitter_tasks:
region_tasks = self.task_handlers.region.create_tasks(emitter_task)
for region_task in region_tasks:
@@ -1145,8 +1209,8 @@ class MscoSlave(MscoProcess):
#
# typically, a task is aborted when an exception is encountered.
def __init__(self, comm):
super(MscoSlave, self).__init__(comm)
def __init__(self):
super().__init__()
self._errors = 0
self._max_errors = 5
@@ -1159,7 +1223,7 @@ class MscoSlave(MscoProcess):
self._running = True
while self._running:
logger.debug("waiting for message")
data = self._comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
data = mpi_comm.recv(source=0, tag=MPI.ANY_TAG, status=s)
if s.tag == TAG_NEW_TASK:
logger.debug("received new task")
self.accept_task(data)
@@ -1189,17 +1253,17 @@ class MscoSlave(MscoProcess):
logger.exception(BMsg("unhandled exception in calculation task {0}", task.id))
self._errors += 1
if self._errors <= self._max_errors:
self._comm.send(data, dest=0, tag=TAG_INVALID_RESULT)
mpi_comm.send(data, dest=0, tag=TAG_INVALID_RESULT)
else:
logger.error("too many exceptions, aborting")
self._running = False
self._comm.send(data, dest=0, tag=TAG_ERROR_ABORTING)
mpi_comm.send(data, dest=0, tag=TAG_ERROR_ABORTING)
else:
logger.debug(BMsg("sending result of task {0} to master", result.id))
self._comm.send(result.get_mpi_message(), dest=0, tag=TAG_NEW_RESULT)
mpi_comm.send(result.get_mpi_message(), dest=0, tag=TAG_NEW_RESULT)
def run_master(mpi_comm, project):
def run_master(project):
"""
initialize and run the master calculation loop.
@@ -1211,25 +1275,25 @@ def run_master(mpi_comm, project):
if an unhandled exception occurs, this function aborts the MPI communicator, killing all MPI processes.
the caller will not have a chance to handle the exception.
@param mpi_comm: MPI communicator (mpi4py.MPI.COMM_WORLD).
@param project: project instance (sub-class of project.Project).
"""
try:
master = MscoMaster(mpi_comm)
master = MscoMaster()
master.setup(project)
master.run()
master.cleanup()
except (SystemExit, KeyboardInterrupt):
if mpi_comm:
mpi_comm.Abort()
raise
except Exception:
logger.exception("unhandled exception in master calculation loop.")
if mpi_comm:
mpi_comm.Abort()
raise
def run_slave(mpi_comm, project):
def run_slave(project):
"""
initialize and run the slave calculation loop.
@@ -1242,12 +1306,10 @@ def run_slave(mpi_comm, project):
unless it is a SystemExit or KeyboardInterrupt (where we expect that the master also receives the signal),
the MPI communicator is aborted, killing all MPI processes.
@param mpi_comm: MPI communicator (mpi4py.MPI.COMM_WORLD).
@param project: project instance (sub-class of project.Project).
"""
try:
slave = MscoSlave(mpi_comm)
slave = MscoSlave()
slave.setup(project)
slave.run()
slave.cleanup()
@@ -1255,6 +1317,7 @@ def run_slave(mpi_comm, project):
raise
except Exception:
logger.exception("unhandled exception in slave calculation loop.")
if mpi_comm:
mpi_comm.Abort()
raise
@@ -1267,12 +1330,9 @@ def run_calculations(project):
@param project: project instance (sub-class of project.Project).
"""
mpi_comm = MPI.COMM_WORLD
mpi_rank = mpi_comm.Get_rank()
if mpi_rank == 0:
logger.debug("MPI rank %u setting up master loop", mpi_rank)
run_master(mpi_comm, project)
run_master(project)
else:
logger.debug("MPI rank %u setting up slave loop", mpi_rank)
run_slave(mpi_comm, project)
run_slave(project)

View File

@@ -1,2 +0,0 @@
edac_all_wrap.*
edac.py

View File

@@ -1 +0,0 @@
__author__ = 'muntwiler_m'

View File

@@ -1,7 +0,0 @@
/* EDAC interface for other programs */
%module edac
%{
extern int run_script(char *scriptfile);
%}
extern int run_script(char *scriptfile);

View File

@@ -1,47 +0,0 @@
SHELL=/bin/sh
# makefile for EDAC program and module
#
# the EDAC source code is not included in the public distribution.
# please obtain it from the original author,
# copy it to this directory,
# and apply the edac_all.patch patch before compilation.
#
# see the top-level makefile for additional information.
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
.PHONY: all clean edac
FC?=gfortran
FCCOPTS?=
F2PY?=f2py
F2PYOPTS?=
CXX?=g++
CXXOPTS?=-Wno-write-strings
PYTHON?=python
PYTHONOPTS?=
all: edac
edac: edac.exe _edac.so edac.py
edac.exe: edac_all.cpp
$(CXX) $(CXXOPTS) -o edac.exe edac_all.cpp
edac.py _edac.so: edac_all.cpp edac_all.i setup.py
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
revision.py: _edac.so
git log --pretty=format:"code_rev = 'Code revision %h, %ad'" --date=iso -1 > $@ || echo "code_rev = 'Code revision unknown, "`date +"%F %T %z"`"'" > $@
echo "" >> revision.py
revision.txt: _edac.so edac.exe
git log --pretty=format:"Code revision %h, %ad" --date=iso -1 > $@ || echo "Code revision unknown, "`date +"%F %T %z"` > $@
echo "" >> revision.txt
clean:
rm -f *.so *.o *.exe *.pyc
rm -f edac.py edac_all_wrap.*
rm -f revision.*

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env python
"""
setup.py file for EDAC
"""
from distutils.core import setup, Extension
edac_module = Extension('_edac',
sources=['edac_all.cpp', 'edac_all.i'],
swig_opts=['-c++']
)
setup (name = 'edac',
version = '0.1',
author = "Matthias Muntwiler",
description = """EDAC module in Python""",
ext_modules = [edac_module],
py_modules = ["edac"],
requires=['numpy']
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
"""
@package pmsco.elements.bindingenergy
Electron binding energies of the elements
Extends the element table of the `periodictable` package
(https://periodictable.readthedocs.io/en/latest/index.html)
by the electron binding energies.
The binding energies are compiled from Gwyn Williams' web page
(https://userweb.jlab.org/~gwyn/ebindene.html).
Please refer to the original web page or the x-ray data booklet
for original sources, definitions and remarks.
Binding energies of gases are replaced by respective values of a common compound
from the 'handbook of x-ray photoelectron spectroscopy' (Physical Electronics, Inc., 1995).
Usage
-----
This module requires the periodictable package (https://pypi.python.org/pypi/periodictable).
~~~~~~{.py}
import periodictable as pt
import pmsco.elements.bindingenergy
# read any periodictable's element interfaces, e.g.
print(pt.gold.binding_energy['4f7/2'])
print(pt.elements.symbol('Au').binding_energy['4f7/2'])
print(pt.elements.name('gold').binding_energy['4f7/2'])
print(pt.elements[79].binding_energy['4f7/2'])
~~~~~~
The database is loaded from the accompanying bindingenergy.json file on first demand.
Attributes are writable, you may update the values in your run-time instance of the database.
Normally, the user will not need to call any functions in this module directly.
The query_binding_energy() function queries all terms with a particular binding energy.
@author Matthias Muntwiler
@copyright (c) 2020-23 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import json
import numpy as np
from pathlib import Path
import periodictable as pt
import periodictable.core
index_energy = np.zeros(0)
index_number = np.zeros(0)
index_term = []
default_data_path = Path(Path(__file__).parent, "bindingenergy.json")
def load_data(data_path=None):
"""
load binding energy data from json file
the data file must be in the same format as generated by save_data.
@param data_path file path of the data file. default: "bindingenergy.json" next to this module file
@return dictionary
"""
if data_path is None:
data_path = default_data_path
with open(data_path, "rt", encoding="utf8") as fp:
data = json.load(fp)
return data
def save_data(data_path=None):
"""
save binding energy data to json file
@param data_path file path of the data file. default: "bindingenergy.json" next to this module file
@return None
"""
if data_path is None:
data_path = default_data_path
data = {}
for element in pt.elements:
element_data = {}
for term, energy in element.binding_energy.items():
element_data[term] = energy
if element_data:
data[element.number] = element_data
with open(data_path, "w", encoding="utf8") as fp:
json.dump(data, fp, sort_keys=True, indent='\t')
def init(table, reload=False):
if 'binding_energy' in table.properties and not reload:
return
table.properties.append('binding_energy')
pt.core.Element.binding_energy = {}
pt.core.Element.binding_energy_units = "eV"
data = load_data()
for el_key, el_data in data.items():
try:
el = table[int(el_key)]
except ValueError:
el = table.symbol(el_key)
el.binding_energy = el_data
def build_index():
"""
build an index for query_binding_energy().
the index is kept in global variables of the module.
@return None
"""
global index_energy
global index_number
global index_term
n = 0
for element in pt.elements:
n += len(element.binding_energy)
index_energy = np.zeros(n)
index_number = np.zeros(n)
index_term = []
for element in pt.elements:
for term, energy in element.binding_energy.items():
index_term.append(term)
i = len(index_term) - 1
index_energy[i] = energy
index_number[i] = element.number
def query_binding_energy(energy, tol=1.0):
"""
search the periodic table for a specific binding energy and return all matching terms.
@param energy: binding energy in eV.
@param tol: tolerance in eV.
@return: list of dictionaries containing element and term specification.
the list is ordered arbitrarily.
each dictionary contains the following keys:
@arg 'number': element number
@arg 'symbol': element symbol
@arg 'term': spectroscopic term
@arg 'energy': actual binding energy
"""
if len(index_energy) == 0:
build_index()
sel = np.abs(index_energy - energy) < tol
idx = np.where(sel)
result = []
for i in idx[0]:
el_num = int(index_number[i])
d = {'number': el_num,
'symbol': pt.elements[el_num].symbol,
'term': index_term[i],
'energy': index_energy[i]}
result.append(d)
return result
def export_flat_text(f):
"""
export the binding energies to a flat general text file.
the file has four space-separated columns `number`, `symbol`, `term`, `energy`.
column names are included in the first row.
@param f: file path or open file object
@return: None
"""
if hasattr(f, "write") and callable(f.write):
f.write("number symbol term energy\n")
for element in pt.elements:
for term, energy in element.binding_energy.items():
f.write(f"{element.number} {element.symbol} {term} {energy}\n")
else:
with open(f, "w") as fi:
export_flat_text(fi)
def import_flat_text(f):
"""
import binding energies from a flat general text file.
data is in space-separated columns.
the first row contains column names.
at least the columns `number`, `term`, `energy` must be present.
the function updates existing entries and appends entries of non-existing terms.
existing terms that are not listed in the file remain unchanged.
@param f: file path or open file object
@return: None
"""
data = np.atleast_1d(np.genfromtxt(f, names=True, dtype=None, encoding="utf8"))
for d in data:
pt.elements[d['number']].binding_energy[d['term']] = d['energy']
def _load_binding_energy():
"""
delayed loading of the binding energy table.
"""
init(periodictable.core.default_table())
periodictable.core.delayed_load(['binding_energy'], _load_binding_energy)

Binary file not shown.

View File

@@ -0,0 +1,446 @@
"""
@package pmsco.elements.photoionization
Photoionization cross-sections of the elements
Extends the element table of the `periodictable` package
(https://periodictable.readthedocs.io/en/latest/index.html)
by a table of photoionization cross-sections and asymmetry parameters.
The data is available from (https://vuo.elettra.eu/services/elements/)
or (https://figshare.com/articles/dataset/Digitisation_of_Yeh_and_Lindau_Photoionisation_Cross_Section_Tabulated_Data/12389750).
Both sources are based on the original atomic data tables by Yeh and Lindau (1985).
The Elettra data includes the cross section and asymmetry parameter and is interpolated at finer steps,
whereas the Kalha data contains only the cross sections at the photon energies calculated by Yeh and Lindau
plus an additional point at 8 keV.
The tables go up to 1500 eV photon energy and do not resolve spin-orbit splitting.
Usage
-----
This module adds the photoionization attribute to the elements database of the periodictable package (https://pypi.python.org/pypi/periodictable).
Python >= 3.6, numpy >= 1.15 and the periodictable package are required.
~~~~~~{.py}
import numpy as np
import periodictable as pt
import pmsco.elements.photoionization
# get a SubShellPhotoIonization object from any of periodictable's element interface:
sspi = pt.gold.photoionization['4f']
sspi = pt.elements.symbol('Au').photoionization['4f']
sspi = pt.elements.name('gold').photoionization['4f']
sspi = pt.elements[79].photoionization['4f']
# get the cross section, asymmetry parameter or differential cross section at 800 eV photon energy:
sspi.cross_section(800)
sspi.asymmetry_parameter(800)
sspi.diff_cross_section(800, gamma=30)
# with the j quantum number, the cross-section is weighted based on a full sub-shell:
sspi = pt.gold.photoionization['4f7/2']
print(sspi.weight)
print(pt.gold.photoionization['4f7/2'].cross_section(800) / pt.gold.photoionization['4f'].cross_section(800))
# the original data is contained in the data array (which is a numpy.recarray):
sspi.data.eph, sspi.data.cs, sspi.data.ap
~~~~~~
The data is loaded on demand from the cross-sections.dat file when the photoionization record is first accessed.
Normally, the user will not need to call any functions in this module directly.
The load_elettra_data()/load_kalha_data() and save_pickled_data() functions are provided
to import data from one of the sources referenced above and
to create the cross-sections.dat file.
@author Matthias Muntwiler
@copyright (c) 2020-23 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import copy
import numpy as np
from pathlib import Path
import periodictable as pt
import pickle
import urllib.request
import urllib.error
import periodictable.core
class PhotoIonization(dict):
"""
photo-ionization parameters of an element
this class provides the photo-ionization cross-section and asymmetry parameter of the sub-shells of an element.
it is, essentially, a dictionary, mapping 'nl' and 'nlj' terms to the corresponding SubShellPhotoIonization object.
examples of 'nl' and 'nlj' terms: '4f' and '4f7/2'
@note the dictionary actually contains raw data for 'nl' terms only.
for 'nlj' terms, the corresponding 'nl' object is copied,
and a weight according to the spin-orbit multiplicity is set.
@note 'nlj' terms are not considered by any methods or properties
except the bracket notation or __getitem__ method!
in particular, iteration or the keys() method will yield 'nl' terms only.
"""
def __init__(self, *args, **kwargs):
"""
dictionary constructor
the class accepts the same arguments as the Python built-in dict constructor.
keys are 'nl' terms, e.g. '4f', and values must be SubShellPhotoIonization() objects.
@param args:
@param kwargs:
"""
super().__init__(*args, **kwargs)
self.cross_section_units = "Mb"
def __getitem__(self, k):
"""
get sub-shell photo-ionization data by 'nl' or 'nlj' term.
@param k: dictionary key.
if this is an 'nl' term, the original object is returned.
if this is an 'nlj' term, a proxy of the corresponding 'nl' object
with shared data but weight based on j-branching is returned.
@return: SubShellPhotoIonization() object
@note whether the original or a proxy object is returned,
its data attribute always refers to the original data.
any modification will affect the original data (process memory).
"""
spi = super().__getitem__(k[0:2])
if len(k) > 2:
spi = copy.copy(spi)
spi.set_spin_orbit(k[1:5])
return spi
class SubShellPhotoIonization(object):
"""
Sub-shell photo-ionization parameters versus photon energy.
this class provides the photo-ionization cross-section and asymmetry parameter of one sub-shell.
it contains a three-column record array of photon energy, cross section and asymmetry parameter in self.data.
accessory functions provide high-level access to specific views and interpolated data.
a weighting factor self.weight is multiplied to the method results.
it is normally used to weight the spin-orbit peaks by calling set_spin_orbit().
"""
SPIN_ORBIT_WEIGHTS = {"p1/2": 1. / 3.,
"p3/2": 2. / 3.,
"d3/2": 2. / 5.,
"d5/2": 3. / 5.,
"f5/2": 3. / 7.,
"f7/2": 4. / 7.}
def __init__(self, photon_energy, cross_section, asymmetry_parameter):
"""
initialize a new object instance.
all arrays must have the same length.
@param photon_energy: (array-like) photon energies
@param cross_section: (array-like) cross-section values
@param asymmetry_parameter: (array-like) asymmetry parameter values
"""
super().__init__()
self.data = np.rec.fromarrays([photon_energy, cross_section, asymmetry_parameter], names='eph, cs, ap')
self.weight = 1.
def cross_section(self, photon_energy):
"""
interpolated sub-shell cross-section at a specific energy.
the weighting factor self.weight (e.g. spin-orbit) is included in the result.
@param photon_energy: photon energy in eV.
can be scalar or numpy array.
@return: cross-section in Mb.
numpy.nan where photon_energy is off range.
"""
cs = np.interp(photon_energy, self.data.eph, self.data.cs, left=np.nan, right=np.nan) * self.weight
return cs
def asymmetry_parameter(self, photon_energy):
"""
interpolated asymmetry parameter at a specific energy.
@param photon_energy: photon energy in eV.
can be scalar or numpy array.
@return: asymmetry parameter (0..2).
numpy.nan where photon_energy is off range.
"""
ap = np.interp(photon_energy, self.data.eph, self.data.ap, left=np.nan, right=np.nan)
return ap
def diff_cross_section(self, photon_energy, gamma):
"""
differential cross-section for linear polarization.
the weighting factor self.weight (e.g. spin-orbit) is included in the result.
@param photon_energy: photon energy in eV.
@param gamma: angle between polarization vector and electron propagation direction in degrees.
@return: differential cross-section in Mb.
"""
p2 = (3 * np.cos(gamma) ** 2 - 1) / 2
cs = self.cross_section(photon_energy)
ap = self.asymmetry_parameter(photon_energy)
dcs = cs / 4 / np.pi * (1 + ap * p2)
return dcs
def photon_energy_array(self):
"""
photon energy array.
the weighting factor self.weight (e.g. spin-orbit) is included in the result.
@return:
"""
return self.data.eph
def cross_section_array(self):
"""
sub-shell cross-section versus photon energy.
the weighting factor self.weight (e.g. spin-orbit) is included in the result.
@return: numpy.ndarray
"""
return self.data.cs * self.weight
def asymmetry_parameter_array(self):
"""
sub-shell asymmetry parameter versus photon energy.
the weighting factor self.weight (e.g. spin-orbit) is included in the result.
@return: numpy.ndarray
"""
return self.data.ap
def diff_cross_section_array(self, gamma):
"""
differential cross-section for linear polarization (full array).
@param gamma: angle between polarization vector and electron propagation direction in degrees.
@return: (np.ndarray) differential cross-section in Mb.
"""
p2 = (3 * np.cos(gamma) ** 2 - 1) / 2
dcs = self.data.cs / 4 / np.pi * (1 + self.data.ap * p2) * self.weight
return dcs
def set_spin_orbit(self, lj):
"""
set the weight according to the spin-orbit quantum number (based on full sub-shell).
the weight is stored in the self.weight attribute.
it is applied to the results of the cross-section methods, but not to the raw data in self.data!
@param lj: (str) 4-character lj term notation, e.g. 'f7/2'
@return: None
"""
self.weight = self.SPIN_ORBIT_WEIGHTS.get(lj, 1.)
def load_kalha_data():
"""
load all cross-sections from csv-files by Kalha et al.
the files must be placed in the 'kalha' directory next to this file.
@return: cross-section data in a nested dictionary, cf. load_pickled_data().
"""
data = {}
p = Path(Path(__file__).parent, "kalha")
for entry in p.glob('*_*.csv'):
if entry.is_file():
try:
element = int(entry.stem.split('_')[0])
except ValueError:
pass
else:
data[element] = load_kalha_file(entry)
return data
def load_kalha_file(path):
"""
load the cross-sections of an element from a csv-file by Kalha et al.
@param path: file path
@return: (dict) dictionary of 'nl' terms.
the data items are tuples (photon_energy, cross_sections) of 1-dimensional numpy arrays.
"""
a = np.genfromtxt(path, delimiter=',', names=True)
b = ~np.isnan(a['Photon_Energy__eV'])
a = a[b]
eph = a['Photon_Energy__eV'].copy()
data = {}
for n in range(1, 8):
for l in 'spdf':
col = f"{n}{l}"
try:
data[col] = SubShellPhotoIonization(eph, a[col].copy(), np.zeros_like(eph))
except ValueError:
pass
return data
def load_kalha_configuration(path):
"""
load the electron configuration from a csv-file by Kalha et al.
@param path: file path
@return: (dict) dictionary of 'nl' terms mapping to number of electrons in the sub-shell.
"""
p = Path(path)
subshells = []
electrons = []
config = {}
with p.open() as f:
for l in f.readlines():
s = l.split(',')
k_eph = "Photon Energy"
k_el = "#electrons"
if s[0][0:len(k_eph)] == k_eph:
subshells = s[1:]
elif s[0][0:len(k_el)] == k_el:
electrons = s[1:]
for i, sh in enumerate(subshells):
if sh:
config[sh] = electrons[i]
return config
def load_elettra_file(symbol, nl):
"""
download the cross sections of one level from the Elettra webelements web site.
@param symbol: (str) element symbol
@param nl: (str) nl term, e.g. '2p' (no spin-orbit)
@return: PhotoIonizationData(photon_energy, cross_section, asymmetry_parameter)
named tuple of 1-dimensional numpy arrays.
"""
spi = None
url = f"https://vuo.elettra.eu/services/elements/data/{symbol.lower()}{nl}.txt"
try:
data = urllib.request.urlopen(url)
except urllib.error.HTTPError:
pass
else:
a = np.genfromtxt(data)
try:
spi = SubShellPhotoIonization(a[:, 0], a[:, 1], a[:, 4])
except IndexError:
pass
return spi
def load_elettra_data():
"""
download the cross sections from the Elettra webelements web site.
@return: cross-section data in a nested dictionary, cf. load_pickled_data().
"""
data = {}
for element in pt.elements:
element_data = {}
for nlj in element.binding_energy:
nl = nlj[0:2]
eb = element.binding_energy[nlj]
if nl not in element_data and eb <= 2000:
spi = load_elettra_file(element.symbol, nl)
if spi is not None:
element_data[nl] = spi
if len(element_data):
data[element.symbol] = element_data
return data
def save_pickled_data(path, data):
"""
save a cross section data dictionary to a python-pickled file.
@param path: file path
@param data: cross-section data in a nested dictionary, cf. load_pickled_data().
@return: None
"""
with open(path, "wb") as f:
pickle.dump(data, f)
def load_pickled_data(path):
"""
load the cross section data from a python-pickled file.
the file can be generated by the save_pickled_data() function.
@param path: file path
@return: cross-section data in a nested dictionary.
the first-level keys are element symbols.
the second-level keys are 'nl' terms (e.g. '2p').
note that the Yeh and Lindau tables do not resolve spin-orbit splitting.
the data items are (photon_energy, cross_sections) tuples
of 1-dimensional numpy arrays holding the data table.
cross section values are given in Mb.
"""
with open(path, "rb") as f:
data = pickle.load(f)
return data
def init(table, reload=False):
"""
loads cross-section data into the periodic table.
this function is called by the periodictable to load the data on demand.
@param table:
@param reload:
@return:
"""
if 'photoionization' in table.properties and not reload:
return
table.properties.append('photoionization')
# default value
pt.core.Element.photoionization = PhotoIonization()
p = Path(Path(__file__).parent, "cross-sections.dat")
data = load_pickled_data(p)
for el_key, el_data in data.items():
# el_data is dict('nl': PhotoIonizationData)
try:
el = table[int(el_key)]
except ValueError:
el = table.symbol(el_key)
el.photoionization = PhotoIonization(el_data)
def _load_photoionization():
"""
delayed loading of the binding energy table.
"""
init(periodictable.core.default_table())
periodictable.core.delayed_load(['photoionization'], _load_photoionization)

209
pmsco/elements/spectrum.py Normal file
View File

@@ -0,0 +1,209 @@
"""
@package pmsco.elements.spectrum
photoelectron spectrum simulator
this module calculates the basic structure of a photoelectron spectrum.
it calculates positions and approximate amplitude of elastic peaks
based on photon energy, binding energy, photoionization cross section, and stoichiometry.
escape depth, photon flux, analyser transmission are not accounted for.
usage
-----
this module requires python 3.6, numpy, matplotlib and
the periodictable package (https://pypi.python.org/pypi/periodictable).
~~~~~~{.py}
import numpy as np
import periodictable as pt
import pmsco.elements.spectrum as spec
# for working with the data
labels, energy, intensity = spec.build_spectrum(800., {"Ti": 1, "O": 2})
# for plotting
spec.plot_spectrum(800., {"Ti": 1, "O": 2})
~~~~~~
@author Matthias Muntwiler
@copyright (c) 2020 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from matplotlib import pyplot as plt
import numpy as np
import periodictable as pt
from . import bindingenergy
from . import photoionization
def get_element(number_or_symbol):
"""
return the given Element object of the periodic table.
@param number_or_symbol: atomic number (int) or chemical symbol (str).
@return: Element object.
"""
try:
el = pt.elements[number_or_symbol]
except KeyError:
el = pt.elements.symbol(number_or_symbol)
return el
def get_binding_energy(photon_energy, element, nlj):
"""
look up the binding energy of a core level and check whether it is smaller than the photon energy.
@param photon_energy: photon energy in eV.
@param element: Element object of the periodic table.
@param nlj: (str) spectroscopic term, e.g. '4f7/2'.
@return: (float) binding energy or numpy.nan.
"""
try:
eb = element.binding_energy[nlj]
except KeyError:
return np.nan
if eb < photon_energy:
return eb
else:
return np.nan
def get_cross_section(photon_energy, element, nlj, gamma=None):
"""
look up the photo-ionization cross-section.
since the Yeh/Lindau tables do not resolve the spin-orbit splitting,
this function applies the normal relative weights of a full sub-shell.
the result is a linear interpolation between tabulated values.
@param photon_energy: photon energy in eV.
@param element: Element object of the periodic table.
@param nlj: (str) spectroscopic term, e.g. '4f7/2'.
the j-value can be left out, in which case the sum over all j-states is returned.
@param gamma: (float) angle in degrees between linear polarization vector and photoelectron emission direction.
By default (None), unpolarized light or magic angle (54.7 deg) geometry is assumed.
@return: (float) total (gamma=None) or differential (gamma not None) cross section in Mb.
"""
if not hasattr(element, "photoionization"):
element = get_element(element)
try:
pi = element.photoionization[nlj]
except KeyError:
return np.nan
if gamma is None:
cs = pi.cross_section(photon_energy)
else:
cs = pi.diff_cross_section(photon_energy, gamma)
return cs
def build_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5, gamma=None):
"""
calculate the positions and amplitudes of core-level photoemission lines.
the function looks up the binding energies and cross sections of all photoemission lines in the energy range
given by the photon energy and returns an array of expected spectral lines.
@param photon_energy: (numeric) photon energy in eV.
@param elements: list or dictionary of elements.
elements are identified by their atomic number (int) or chemical symbol (str).
if a dictionary is given, the (float) values are stoichiometric weights of the elements.
@param binding_energy: (bool) return binding energies (True) rather than kinetic energies (False, default).
@param work_function: (float) work function of the instrument in eV.
@param gamma: (float) angle in degrees between linear polarization vector and photoelectron emission direction.
By default (None), unpolarized light or magic angle (54.7 deg) geometry is assumed.
@return: tuple (labels, positions, intensities) of 1-dimensional numpy arrays representing the spectrum.
labels are in the format {Symbol}{n}{l}{j}.
"""
ekin = []
ebind = []
intens = []
labels = []
for element in elements:
el = get_element(element)
for n in range(1, 8):
for l in "spdf":
for j in ['', '1/2', '3/2', '5/2', '7/2']:
nlj = f"{n}{l}{j}"
eb = get_binding_energy(photon_energy, el, nlj)
cs = get_cross_section(photon_energy, el, nlj, gamma=gamma)
try:
cs = cs * elements[element]
except (KeyError, TypeError):
pass
if not np.isnan(eb) and not np.isnan(cs):
ekin.append(photon_energy - eb - work_function)
ebind.append(eb)
intens.append(cs)
labels.append(f"{el.symbol}{nlj}")
ebind = np.array(ebind)
ekin = np.array(ekin)
intens = np.array(intens)
labels = np.array(labels)
if binding_energy:
return labels, ebind, intens
else:
return labels, ekin, intens
def plot_spectrum(photon_energy, elements, binding_energy=False, work_function=4.5, gamma=None, show_labels=True):
"""
plot a simple spectrum representation of a material.
the function looks up the binding energies and cross sections of all photoemission lines in the energy range
given by the photon energy and returns an array of expected spectral lines.
the spectrum is plotted using matplotlib.pyplot.stem.
@param photon_energy: (numeric) photon energy in eV.
@param elements: list or dictionary of elements.
elements are identified by their atomic number (int) or chemical symbol (str).
if a dictionary is given, the (float) values are stoichiometric weights of the elements.
@param binding_energy: (bool) return binding energies (True) rather than kinetic energies (False, default).
@param work_function: (float) work function of the instrument in eV.
@param gamma: (float) angle in degrees between linear polarization vector and photoelectron emission direction.
By default (None), unpolarized light or magic angle (54.7 deg) geometry is assumed.
@param show_labels: (bool) show peak labels (True, default) or not (False).
@return: (figure, axes)
"""
labels, energy, intensity = build_spectrum(photon_energy, elements, binding_energy=binding_energy,
work_function=work_function, gamma=gamma)
fig, ax = plt.subplots()
ax.stem(energy, intensity, basefmt=' ', use_line_collection=True)
if show_labels:
for sxy in zip(labels, energy, intensity):
ax.annotate(sxy[0], xy=(sxy[1], sxy[2]), textcoords='data')
ax.grid()
if binding_energy:
ax.set_xlabel('binding energy')
else:
ax.set_xlabel('kinetic energy')
ax.set_ylabel('intensity')
ax.set_title(elements)
return fig, ax
def plot_cross_section(el, nlj):
energy = np.arange(100, 1500, 140)
cs = get_cross_section(energy, el, nlj)
fig, ax = plt.subplots()
ax.set_yscale("log")
ax.plot(energy, cs)

View File

@@ -27,20 +27,20 @@ logger = logging.getLogger(__name__)
#
# each string of this set marks a category of files.
#
# @arg @c 'input' : raw input files for calculator, including cluster and atomic files in custom format
# @arg @c 'output' : raw output files from calculator
# @arg @c 'atomic' : atomic scattering (phase, emission) files in portable format
# @arg @c 'cluster' : cluster files in portable XYZ format for report
# @arg @c 'log' : log files
# @arg @c 'debug' : debug files
# @arg @c 'model': output files in ETPAI format: complete simulation (a_-1_-1_-1_-1)
# @arg @c 'scan' : output files in ETPAI format: scan (a_b_-1_-1_-1)
# @arg @c 'symmetry' : output files in ETPAI format: symmetry (a_b_c_-1_-1)
# @arg @c 'emitter' : output files in ETPAI format: emitter (a_b_c_d_-1)
# @arg @c 'region' : output files in ETPAI format: region (a_b_c_d_e)
# @arg @c 'report': final report of results
# @arg @c 'population': final state of particle population
# @arg @c 'rfac': files related to models which give bad r-factors (dynamic category, see below).
# @arg 'input' : raw input files for calculator, including cluster and atomic files in custom format
# @arg 'output' : raw output files from calculator
# @arg 'atomic' : atomic scattering (phase, emission) files in portable format
# @arg 'cluster' : cluster files in portable XYZ format for report
# @arg 'log' : log files
# @arg 'debug' : debug files
# @arg 'model': output files in ETPAI format: complete simulation (a_-1_-1_-1_-1)
# @arg 'scan' : output files in ETPAI format: scan (a_b_-1_-1_-1)
# @arg 'domain' : output files in ETPAI format: domain (a_b_c_-1_-1)
# @arg 'emitter' : output files in ETPAI format: emitter (a_b_c_d_-1)
# @arg 'region' : output files in ETPAI format: region (a_b_c_d_e)
# @arg 'report': final report of results
# @arg 'population': final state of particle population
# @arg 'rfac': files related to models which give bad r-factors (dynamic category, see below).
#
# @note @c 'rfac' is a dynamic category not connected to a particular file or content type.
# no file should be marked @c 'rfac'.
@@ -48,7 +48,7 @@ logger = logging.getLogger(__name__)
# if so, all files related to bad models are deleted, regardless of their static category.
#
FILE_CATEGORIES = {'cluster', 'atomic', 'input', 'output',
'report', 'region', 'emitter', 'scan', 'symmetry', 'model',
'report', 'region', 'emitter', 'scan', 'domain', 'model',
'log', 'debug', 'population', 'rfac'}
## @var FILE_CATEGORIES_TO_KEEP
@@ -242,37 +242,52 @@ class FileTracker(object):
else:
self._complete_models.discard(model)
def delete_files(self, categories=None):
def delete_files(self, categories=None, incomplete_models=False):
"""
delete the files matching the list of categories.
delete all files matching a set of categories.
@version this method does not act on the 'rfac' category.
this function deletes all files that are tagged with one of the given categories.
tags are set by the code sections that create the files.
for a list of common categories, see FILE_CATEGORIES.
the categories can be given as an argument or taken from the categories_to_delete property.
files are deleted regardless of R-factor.
be sure to specify only categories that you don't need in the output at all.
by default, only files of complete models (cf. set_model_complete()) are deleted
to avoid interference with running calculations.
to clean up after calculations, the incomplete_models argument can override this.
@note this method does not act on the special 'rfac' category (see delete_bad_rfac()).
@param categories: set of file categories to delete.
defaults to self.categories_to_delete.
if the argument is None, it defaults to the categories_to_delete property.
@param incomplete_models: (bool) delete files of incomplete models as well.
by default (False), incomplete models are not deleted.
@return: None
"""
if categories is None:
categories = self.categories_to_delete
for cat in categories:
self.delete_category(cat)
self.delete_category(cat, incomplete_models=incomplete_models)
def delete_bad_rfac(self, keep=0, force_delete=False):
"""
delete the files of all models except a specified number of good models.
delete all files of all models except for a specified number of best ranking models.
the method first determines which models to keep.
models with R factor values of 0.0, without a specified R-factor, and
the specified number of best ranking non-zero models are kept.
the files belonging to the keeper models are kept, all others are deleted,
regardless of category.
files of incomplete models are also kept.
in addition, incomplete models, models with R factor = 0.0,
and those without a specified R-factor are kept.
all other files are deleted.
the method does not consider the file category.
the files are deleted from the list and the file system.
files are deleted only if 'rfac' is specified in self.categories_to_delete
or if force_delete is set to True.
the method executes only if 'rfac' is specified in self.categories_to_delete
or if force_delete is True.
otherwise the method does nothing.
@param keep: number of files to keep.
@@ -330,17 +345,31 @@ class FileTracker(object):
return len(del_models)
def delete_category(self, category):
def delete_category(self, category, incomplete_models=False):
"""
delete all files of a specified category from the list and the file system.
only files of complete models (cf. set_model_complete()) are deleted, but regardless of R-factor.
this function deletes all files that are tagged with the given category.
tags are set by the code sections that create the files.
for a list of common categories, see FILE_CATEGORIES.
files are deleted regardless of R-factor.
be sure to specify only categories that you don't need in the output at all.
by default, only files of complete models (cf. set_model_complete()) are deleted
to avoid interference with running calculations.
to clean up after calculations, the incomplete_models argument can override this.
@param category: (str) category.
should be one of FILE_CATEGORIES. otherwise, the function has no effect.
@param incomplete_models: (bool) delete files of incomplete models as well.
by default (False), incomplete models are not deleted.
@return: None
"""
del_names = {name for (name, cat) in self._file_category.items() if cat == category}
if not incomplete_models:
del_names &= {name for (name, model) in self._file_model.items() if model in self._complete_models}
for name in del_names:
self.delete_file(name)
@@ -375,3 +404,33 @@ class FileTracker(object):
logger.warning("file system error deleting file {0}".format(path))
else:
logger.debug("delete file {0} ({1}, model {2})".format(path, cat, model))
def list_files_other_models(prefix, models):
"""
list input/output files except those of the given models.
this can be used to clean up all files except those belonging to the given models.
to delete the listed files:
for f in files:
os.remove(f)
@param prefix: file name prefix up to the first underscore.
only files starting with this prefix are listed.
@param models: sequence or set of model numbers that should not be listed.
@return: set of file names
"""
file_names = set([])
for entry in os.scandir():
if entry.is_file:
elements = entry.name.split('_')
try:
if len(elements) == 6 and elements[0] == prefix and int(elements[1]) not in models:
file_names.add(entry.name)
except (IndexError, ValueError):
pass
return file_names

178
pmsco/graphics/cluster.py Executable file
View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python
"""
@package pmsco.graphics.cluster
graphics rendering module for clusters.
this module is experimental.
interface and implementation may change without notice.
at the moment we are evaluating rendering solutions.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2017 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import numpy as np
import argparse
import logging
logger = logging.getLogger(__name__)
try:
import pymol2
except ImportError:
logger.warning("error importing pymol2. cluster rendering using pymol2 disabled.")
pymol2 = None
try:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
# from matplotlib.backends.backend_pdf import FigureCanvasPdf
# from matplotlib.backends.backend_svg import FigureCanvasSVG
except ImportError:
Axes3D = None
Figure = None
FigureCanvas = None
logger.warning("error importing matplotlib. cluster rendering using matplotlib disabled.")
def render_file(spath, view):
sname = "cluster"
opath = spath + ".png"
pm = pymol2.PyMOL()
cmd = pm.cmd
pm.start()
try:
cmd.reinitialize()
cmd.load(spath, sname)
cmd.disable("all")
cmd.enable(sname)
cmd.set("orthoscopic", 1)
cmd.bg_color("white")
cmd.show_as("spheres")
cmd.alter("all", "vdw=0.8")
#cmd.show("sticks")
#zoom selection-expression # selection to fill the viewer
#orient selection-expression # largest dim horizontal, second-largest vertical
#cmd.orient() --- should stick to fixed orientation
#cmd.turn("x", -90)
#cmd.turn("x", 0)
#cmd.turn("y", 0)
#cmd.clip("slab", 5.0)
cmd.viewport(640, 640)
cmd.zoom(complete=1)
#pymol.cmd.rebuild() #--- necessary?
cmd.png(opath)
finally:
pm.stop()
def render_cluster(clu):
pass
def set_axes_equal(ax):
"""
Make axes of 3D plot have equal scale so that spheres appear as spheres,
cubes as cubes, etc.. This is one possible solution to Matplotlib's
ax.set_aspect('equal') and ax.axis('equal') not working for 3D.
@author https://stackoverflow.com/a/31364297
@param ax: a matplotlib axis, e.g., as output from plt.gca().
"""
x_limits = ax.get_xlim3d()
y_limits = ax.get_ylim3d()
z_limits = ax.get_zlim3d()
x_range = abs(x_limits[1] - x_limits[0])
x_middle = np.mean(x_limits)
y_range = abs(y_limits[1] - y_limits[0])
y_middle = np.mean(y_limits)
z_range = abs(z_limits[1] - z_limits[0])
z_middle = np.mean(z_limits)
# The plot bounding box is a sphere in the sense of the infinity
# norm, hence I call half the max range the plot radius.
plot_radius = 0.5*max([x_range, y_range, z_range])
ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius])
ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius])
ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius])
def render_xyz_matplotlib(filename, data, canvas=None):
"""
produce a graphics file from an array of 3d coordinates in the matplotlib scatter style.
the default file format is PNG.
this function requires the matplotlib module.
if it is not available, the function raises an error.
@param filename: path and name of the scan file.
this is used to derive the output file path by adding the extension of the graphics file format.
@param data: numpy array of shape (N,3).
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
@raise TypeError if matplotlib is not available.
"""
if canvas is None:
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(111, projection='3d')
# ax.set_aspect('equal')
try:
# method available in matplotlib 2.1 and later
ax.set_proj_type('ortho')
except AttributeError:
pass
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c='r', marker='o')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
set_axes_equal(ax)
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def exec_cli():
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--view', default='z')
parser.add_argument(dest='files', nargs='+')
args = parser.parse_args()
for fil in args.files:
render_file(fil, args.view)
if __name__ == '__main__':
exec_cli()
sys.exit(0)

View File

@@ -182,7 +182,7 @@ def render_results(results_file, data=None):
"""
if data is None:
data = np.genfromtxt(results_file, names=True)
data = np.atleast_1d(np.genfromtxt(results_file, names=True))
summary = evaluate_results(data)

View File

@@ -7,16 +7,13 @@ interface and implementation are subject to change.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2018 by Paul Scherrer Institut @n
@copyright (c) 2018-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import numpy as np
@@ -135,9 +132,8 @@ def render_ea_scan(filename, data, scan_mode, canvas=None, is_modf=False):
im.set_cmap("RdBu_r")
dhi = max(abs(dlo), abs(dhi))
dlo = -dhi
im.set_clim((dlo, dhi))
im.set_clim((-1., 1.))
try:
# requires matplotlib 2.1.0
ti = cb.get_ticks()
ti = [min(ti), 0., max(ti)]
cb.set_ticks(ti)
@@ -206,8 +202,10 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
cb = fig.colorbar(pc, shrink=0.4, pad=0.1)
dlo = np.nanpercentile(data['i'], 2)
dhi = np.nanpercentile(data['i'], 98)
clip = 2
dlo = np.nanpercentile(data['i'], clip)
dhi = np.nanpercentile(data['i'], 100 - clip)
if is_modf:
pc.set_cmap("RdBu_r")
# im.set_cmap("coolwarm")
@@ -215,7 +213,6 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
dlo = -dhi
pc.set_clim((dlo, dhi))
try:
# requires matplotlib 2.1.0
ti = cb.get_ticks()
ti = [min(ti), 0., max(ti)]
cb.set_ticks(ti)
@@ -226,9 +223,12 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
# im.set_cmap("inferno")
# im.set_cmap("viridis")
pc.set_clim((dlo, dhi))
try:
ti = cb.get_ticks()
ti = [min(ti), max(ti)]
cb.set_ticks(ti)
except AttributeError:
pass
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)

View File

@@ -0,0 +1,210 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import math
import numpy as np
import scipy.interpolate
import scipy.special
logger = logging.getLogger(__name__)
try:
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
except ImportError:
Figure = None
FigureCanvas = None
logger.warning("error importing matplotlib. graphics rendering disabled.")
class TMatrix(object):
def __init__(self):
"""
self.en.shape = (n_e,)
self.tl.shape = (n_e, n_l)
"""
self.en = None
self.tl = None
def load_test_data(self):
self.en = np.array([100.])
raw = [-0.052845, -0.003238, 0.478705, 0.672581, 0.137932, 0.981700, 0.323890, 0.805299, 0.291814, 0.776792,
0.369416, 0.351845, 0.199775, 0.113314, 0.062479, 0.025691, 0.013699, 0.005283]
re_tl = np.array(raw[0::2])
im_tl = np.array(raw[1::2])
self.tl = re_tl + 1j * im_tl
def load_edac_scattering(self, f, energy=math.nan):
"""
load T matrix from EDAC scattering file
currently, only the 'tl' format is supported.
@param f: file path
@param energy: kinetic energy in eV if none is defined in the file
@return: None
"""
with open(f, "r") as fi:
h = fi.readline().rstrip().split(' ')
ne = int(h[0])
if ne > 1:
assert h[1] == 'E(eV)'
del h[1]
lmax = int(h[1])
assert h[2] == 'regular'
assert h[3] == 'tl'
self.load_edac_tl(f, ne, lmax, energy=energy)
def load_edac_tl(self, f, ne, lmax, energy=math.nan):
"""
load T matrix from EDAC scattering file in 'tl' format
@param f: file path
@param ne: number of energies (rows)
@param lmax: maximum l number (columns = 2 * (lmax + 1))
@param energy: kinetic energy in eV if none is defined in the file
@return: None
"""
if ne > 1:
self.en = np.atleast_1d(np.genfromtxt(f, skip_header=1, usecols=[0]))
start_col = 1
else:
self.en = np.asarray(energy)
start_col = 0
re_cols = range(start_col, start_col + (lmax + 1) * 2, 2)
im_cols = range(start_col + 1, start_col + (lmax + 1) * 2, 2)
re_tl = np.atleast_1d(np.genfromtxt(f, skip_header=1, usecols=re_cols))
im_tl = np.atleast_1d(np.genfromtxt(f, skip_header=1, usecols=im_cols))
self.tl = re_tl + 1j * im_tl
assert self.tl.shape == (ne, lmax + 1), "array shape mismatch"
def planewave_amplitude(self, energy, angle):
"""
total, complex plane wave scattering amplitude for given energy and angle
@param energy: kinetic energy in eV.
this can be a numeric value, a 1-dimensional numpy.ndarray,
or any value accepted by the numpy.asarray function.
@param angle: scattering angle in degrees (0..180).
this can be a numeric value, a 1-dimensional numpy.ndarray,
or any value accepted by the numpy.asarray function.
@return: 3 numpy arrays (amp, magnitude, phase) representing the scattering amplitude
versus energy and angle.
the shape of the three arrays is (n_energies, n_angles).
@arg amp: complex scattering amplitude.
@arg magnitude: magnitude (absolute value) of the scattering amplitude.
@arg phase: phase angle in radians of the scattering amplitude.
"""
if not isinstance(energy, np.ndarray):
energy = np.atleast_1d(np.asarray(energy))
ne = len(energy)
if not isinstance(angle, np.ndarray):
angle = np.atleast_1d(np.array(angle))
na = len(angle)
kinv = 1. / (0.513019932 * np.sqrt(energy))
f_tl = scipy.interpolate.interp1d(self.en, self.tl, axis=0, copy=False)
tl = f_tl(energy)
cos_angle = np.cos(np.radians(angle))
lmax = self.tl.shape[1] - 1
l = np.arange(0, lmax + 1)
amp = np.zeros((ne, na), dtype=complex)
for ia, ca in enumerate(cos_angle):
lpmn, __ = scipy.special.lpmn(0, lmax, ca)
fpart = np.outer(kinv, (2 * l + 1) * lpmn[0]) * tl
ftot = np.sum(fpart, axis=-1)
amp[:, ia] = ftot
mag = np.abs(amp)
pha = np.angle(amp)
return amp, mag, pha
def render_scattering_1d(filename, tmatrix, energy=None):
if energy is None:
en = tmatrix.en[0]
else:
en = energy
an = np.arange(0, 181, 2)
__, mag, pha = tmatrix.planewave_amplitude(en, an)
pha = pha / math.pi
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(211)
ax.plot(an, mag[0])
ax.set_xlabel('th (deg)')
ax.set_ylabel('mag (arb)')
ax = fig.add_subplot(212)
ax.plot(an, pha[0])
ax.set_xlabel('th (deg)')
ax.set_ylabel('pha (1/pi)')
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def render_scattering_2d(filename, tmatrix):
en = tmatrix.en
an = np.arange(0, 181, 2)
__, mag, pha = tmatrix.planewave_amplitude(en, an)
pha = pha / math.pi
canvas = FigureCanvas
fig = Figure()
canvas(fig)
ax = fig.add_subplot(211)
im = ax.imshow(mag, origin='lower', aspect='auto', interpolation='none')
im.set_extent((an[0], an[-1], en[0], en[-1]))
im.set_cmap("magma")
ax.set_xlabel('th (deg)')
ax.set_ylabel('E (eV)')
# cb = ax.colorbar(im, shrink=0.4, pad=0.1)
# ti = cb.get_ticks()
# ti = [0., max(ti)]
# cb.set_ticks(ti)
ax = fig.add_subplot(212)
im = ax.imshow(pha, origin='lower', aspect='auto', interpolation='none')
im.set_extent((an[0], an[-1], en[0], en[-1]))
im.set_cmap("RdBu_r")
ax.set_xlabel('th (deg)')
ax.set_ylabel('E (eV)')
# cb = ax.colorbar(im, shrink=0.4, pad=0.1)
dlo = np.nanpercentile(mag, 2)
dhi = np.nanpercentile(mag, 98)
dhi = max(abs(dlo), abs(dhi))
dlo = -dhi
im.set_clim((dlo, dhi))
# ti = cb.get_ticks()
# ti = [min(ti), 0., max(ti)]
# cb.set_ticks(ti)
out_filename = "{0}.{1}".format(filename, canvas.get_default_filetype())
fig.savefig(out_filename)
return out_filename
def render_scattering_map(filename, energy):
tmatrix = TMatrix()
tmatrix.load_edac_scattering(filename, energy)
if tmatrix.tl.shape[0] == 1:
out_filename = render_scattering_1d(filename, tmatrix)
else:
out_filename = render_scattering_2d(filename, tmatrix)
return out_filename

View File

@@ -1,6 +1,6 @@
"""
@package pmsco.handlers
project-independent task handlers for models, scans, symmetries, emitters and energies.
project-independent task handlers for models, scans, domains, emitters and energies.
calculation tasks are organized in a hierarchical tree.
at each node, a task handler (feel free to find a better name)
@@ -20,9 +20,9 @@ the handlers of the structural optimizers are declared in separate modules.
scans are defined by the project.
the actual merging step from multiple scans into one result dataset is delegated to the project class.
<em>symmetry handlers</em> split a task into one child per symmetry.
symmetries are defined by the project.
the actual merging step from multiple symmetries into one result dataset is delegated to the project class.
<em>domain handlers</em> split a task into one child per domain.
domains are defined by the project.
the actual merging step from multiple domains into one result dataset is delegated to the project class.
<em>emitter handlers</em> split a task into one child per emitter configuration (inequivalent sets of emitting atoms).
emitter configurations are defined by the project.
@@ -35,31 +35,28 @@ code inspection and tests have shown that per-emitter results from EDAC can be s
in order to take advantage of parallel processing.
while several classes of model handlers are available,
the default handlers for scans, symmetries, emitters and energies should be sufficient in most situations.
the scan and symmetry handlers call methods of the project class to invoke project-specific functionality.
the default handlers for scans, domains, emitters and energies should be sufficient in most situations.
the scan and domain handlers call methods of the project class to invoke project-specific functionality.
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-18 by Paul Scherrer Institut @n
@copyright (c) 2015-21 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
from functools import reduce
import logging
import math
import numpy as np
import os
from pathlib import Path
from pmsco.compat import open
import pmsco.data as md
import pmsco.dispatch as dispatch
import pmsco.graphics.scan as mgs
from pmsco.helpers import BraceMessage as BMsg
@@ -127,10 +124,14 @@ class TaskHandler(object):
for best efficiency the number of tasks generated should be greater or equal the number of slots.
it should not exceed N times the number of slots, where N is a reasonably small number.
@return None
@return (int) number of children that create_tasks() will generate on average.
the number does not need to be accurate, a rough estimate or order of magnitude if greater than 10 is fine.
it is used to distribute processing slots across task levels.
see pmsco.dispatch.MscoMaster.setup().
"""
self._project = project
self._slots = slots
return 1
def cleanup(self):
"""
@@ -372,8 +373,8 @@ class SingleModelHandler(ModelHandler):
keys = [key for key in self.result]
keys.sort(key=lambda t: t[0].lower())
vals = (str(self.result[key]) for key in keys)
filename = self._project.output_file + ".dat"
with open(filename, "w") as outfile:
filename = Path(self._project.output_file).with_suffix(".dat")
with open(filename, "wt", encoding="latin1") as outfile:
outfile.write("# ")
outfile.write(" ".join(keys))
outfile.write("\n")
@@ -416,6 +417,8 @@ class ScanHandler(TaskHandler):
def setup(self, project, slots):
"""
initialize the scan task handler and save processed experimental scans.
@return (int) number of scans defined in the project.
"""
super(ScanHandler, self).setup(project, slots)
@@ -430,13 +433,15 @@ class ScanHandler(TaskHandler):
if project.combined_scan is not None:
ext = md.format_extension(project.combined_scan)
filename = project.output_file + ext
filename = Path(project.output_file).with_suffix(ext)
md.save_data(filename, project.combined_scan)
if project.combined_modf is not None:
ext = md.format_extension(project.combined_modf)
filename = project.output_file + ".modf" + ext
filename = Path(project.output_file).with_suffix(".modf" + ext)
md.save_data(filename, project.combined_modf)
return len(self._project.scans)
def create_tasks(self, parent_task):
"""
generate a calculation task for each scan of the given parent task.
@@ -526,7 +531,7 @@ class ScanHandler(TaskHandler):
return None
class SymmetryHandler(TaskHandler):
class DomainHandler(TaskHandler):
## @var _pending_ids_per_parent
# (dict) sets of child task IDs per parent
#
@@ -546,20 +551,29 @@ class SymmetryHandler(TaskHandler):
# the values are sets of all child CalculationTask.id belonging to the parent.
def __init__(self):
super(SymmetryHandler, self).__init__()
super(DomainHandler, self).__init__()
self._pending_ids_per_parent = {}
self._complete_ids_per_parent = {}
def setup(self, project, slots):
"""
initialize the domain task handler.
@return (int) number of domains defined in the project.
"""
super(DomainHandler, self).setup(project, slots)
return len(self._project.domains)
def create_tasks(self, parent_task):
"""
generate a calculation task for each symmetry of the given parent task.
generate a calculation task for each domain of the given parent task.
all symmetries share the same model parameters.
all domains share the same model parameters.
@return list of CalculationTask objects, with one element per symmetry.
the symmetry index varies according to project.symmetries.
@return list of CalculationTask objects, with one element per domain.
the domain index varies according to project.domains.
"""
super(SymmetryHandler, self).create_tasks(parent_task)
super(DomainHandler, self).create_tasks(parent_task)
parent_id = parent_task.id
self._parent_tasks[parent_id] = parent_task
@@ -567,10 +581,10 @@ class SymmetryHandler(TaskHandler):
self._complete_ids_per_parent[parent_id] = set()
out_tasks = []
for (i_sym, sym) in enumerate(self._project.symmetries):
for (i_dom, domain) in enumerate(self._project.domains):
new_task = parent_task.copy()
new_task.parent_id = parent_id
new_task.change_id(sym=i_sym)
new_task.change_id(domain=i_dom)
child_id = new_task.id
self._pending_tasks[child_id] = new_task
@@ -579,25 +593,25 @@ class SymmetryHandler(TaskHandler):
out_tasks.append(new_task)
if not out_tasks:
logger.error("no symmetry tasks generated. your project must declare at least one symmetry.")
logger.error("no domain tasks generated. your project must declare at least one domain.")
return out_tasks
def add_result(self, task):
"""
collect and combine the calculation results versus symmetry.
collect and combine the calculation results versus domain.
* mark the task as complete
* store its result for later
* check whether this was the last pending task of the family (belonging to the same parent).
the actual merging of data is delegated to the project's combine_symmetries() method.
the actual merging of data is delegated to the project's combine_domains() method.
@param task: (CalculationTask) calculation task that completed.
@return parent task (CalculationTask) if the family is complete. None if the family is not complete yet.
"""
super(SymmetryHandler, self).add_result(task)
super(DomainHandler, self).add_result(task)
self._complete_tasks[task.id] = task
del self._pending_tasks[task.id]
@@ -607,7 +621,7 @@ class SymmetryHandler(TaskHandler):
family_pending.remove(task.id)
family_complete.add(task.id)
# all symmetries complete?
# all domains complete?
if len(family_pending) == 0:
parent_task = self._parent_tasks[task.parent_id]
@@ -624,7 +638,7 @@ class SymmetryHandler(TaskHandler):
parent_task.time = reduce(lambda a, b: a + b, child_times)
if parent_task.result_valid:
self._project.combine_symmetries(parent_task, child_tasks)
self._project.combine_domains(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
@@ -669,6 +683,19 @@ class EmitterHandler(TaskHandler):
self._pending_ids_per_parent = {}
self._complete_ids_per_parent = {}
def setup(self, project, slots):
"""
initialize the emitter task handler.
@return (int) estimated number of emitter configurations that the cluster generator will generate.
the estimate is based on the start parameters, scan 0 and domain 0.
"""
super(EmitterHandler, self).setup(project, slots)
mock_model = self._project.model_space.start
mock_index = dispatch.CalcID(-1, 0, 0, -1, -1)
n_emitters = project.cluster_generator.count_emitters(mock_model, mock_index)
return n_emitters
def create_tasks(self, parent_task):
"""
generate a calculation task for each emitter configuration of the given parent task.
@@ -750,11 +777,11 @@ class EmitterHandler(TaskHandler):
if parent_task.result_valid:
self._project.combine_emitters(parent_task, child_tasks)
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'domain')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'domain')
graph_file = mgs.render_scan(parent_task.modf_filename,
ref_data=self._project.scans[parent_task.id.scan].modulation)
self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')
self._project.files.add_file(graph_file, parent_task.id.model, 'domain')
del self._pending_ids_per_parent[parent_task.id]
del self._complete_ids_per_parent[parent_task.id]
@@ -921,7 +948,7 @@ class EnergyRegionHandler(RegionHandler):
@param slots (int) number of calculation slots (processes).
@return None
@return (int) average number of child tasks
"""
super(EnergyRegionHandler, self).setup(project, slots)
@@ -934,6 +961,8 @@ class EnergyRegionHandler(RegionHandler):
logger.debug(BMsg("region handler: split scan {file} into {slots} chunks",
file=os.path.basename(scan.filename), slots=self._slots_per_scan[i]))
return max(int(sum(self._slots_per_scan) / len(self._slots_per_scan)), 1)
def create_tasks(self, parent_task):
"""
generate a calculation task for each energy region of the given parent task.
@@ -972,27 +1001,3 @@ class EnergyRegionHandler(RegionHandler):
logger.error("no region tasks generated. this is probably a bug.")
return out_tasks
def choose_region_handler_class(project):
"""
choose a suitable region handler for the project.
the function returns the EnergyRegionHandler class
if the project includes an energy scan with at least 10 steps.
Otherwise, it returns the SingleRegionHandler.
angle scans do not benefit from region splitting in EDAC.
@param project: Project instance.
@return: SingleRegionHandler or EnergyRegionHandler class.
"""
energy_scans = 0
for scan in project.scans:
if scan.energies.shape[0] >= 10:
energy_scans += 1
if energy_scans >= 1:
return EnergyRegionHandler
else:
return SingleRegionHandler

View File

@@ -6,6 +6,13 @@ a collection of small and generic code bits mostly collected from the www.
"""
import contextlib
import ctypes
import io
import os
import sys
from typing import BinaryIO
class BraceMessage(object):
"""
@@ -22,3 +29,40 @@ class BraceMessage(object):
def __str__(self):
return self.fmt.format(*self.args, **self.kwargs)
libc = ctypes.CDLL(None)
c_stdout = ctypes.c_void_p.in_dll(libc, 'stdout')
@contextlib.contextmanager
def stdout_redirected(dest_file: BinaryIO):
"""
A context manager to temporarily redirect stdout to a file.
Redirects all standard output from Python and the C library to the specified file.
This can be used, e.g., to capture output from Fortran code.
credit: https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
@param dest_file: binary file open for writing ('wb' mode).
This function requires just the fileno function.
@return: None
"""
original_stdout_fd = sys.stdout.fileno()
def _redirect_stdout(to_fd):
"""Redirect stdout to the given file descriptor."""
libc.fflush(c_stdout)
sys.stdout.close()
os.dup2(to_fd, original_stdout_fd)
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, 'wb'))
saved_stdout_fd = os.dup(original_stdout_fd)
try:
_redirect_stdout(dest_file.fileno())
yield
_redirect_stdout(saved_stdout_fd)
finally:
os.close(saved_stdout_fd)

View File

@@ -6,21 +6,15 @@ this module provides functions for loading/saving pmsco data in igor pro.
@author Matthias Muntwiler
@copyright (c) 2019 by Paul Scherrer Institut @n
@copyright (c) 2019-23 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from pmsco.compat import open
def _escape_igor_string(s):
s = s.replace('\\', '\\\\')
@@ -91,7 +85,7 @@ class IgorExport(object):
"""
write to igor file.
"""
with open(filename, 'w') as f:
with open(filename, 'wt', encoding="utf8") as f:
self._write_header(f)
self._write_data(f)

View File

@@ -1 +0,0 @@
__author__ = 'matthias muntwiler'

View File

@@ -1,74 +0,0 @@
SHELL=/bin/sh
# makefile for the LOESS module
#
# required libraries: libblas, liblapack, libf2c
# (you may have to set soft links so that linker finds them)
#
# the makefile calls python-config to get the compilation flags and include path.
# you may override the corresponding variables on the command line or by environment variables:
#
# PYTHON_INC: specify additional include directories. each dir must start with -I prefix.
# PYTHON_CFLAGS: specify the C compiler flags.
#
# see the top-level makefile for additional information.
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so .x
.PHONY: all loess test gas madeup ethanol air galaxy
OBJ=loessc.o loess.o predict.o misc.o loessf.o dqrsl.o dsvdc.o fix_main.o
FFLAGS?=-O
LIB=-lblas -lm -lf2c
LIBPATH?=
CC?=gcc
CCOPTS?=
SWIG?=swig
SWIGOPTS?=
PYTHON?=python
PYTHONOPTS?=
PYTHON_CONFIG = ${PYTHON}-config
#PYTHON_LIB ?= $(shell ${PYTHON_CONFIG} --libs)
#PYTHON_INC ?= $(shell ${PYTHON_CONFIG} --includes)
PYTHON_INC ?=
PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
#PYTHON_LDFLAGS ?= $(shell ${PYTHON_CONFIG} --ldflags)
all: loess
loess: _loess.so
loess.py _loess.so: loess.c loess.i
$(PYTHON) $(PYTHONOPTS) setup.py build_ext --inplace
examples: gas madeup ethanol air galaxy
gas: gas.x
gas.x: gas.o $(OBJ)
$(CC) -o gas.x gas.o $(OBJ) $(LIB)
madeup: madeup.x
madeup.x: madeup.o $(OBJ)
$(CC) -o madeup.x madeup.o $(OBJ) $(LIB)
ethanol: ethanol.x
ethanol.x: ethanol.o $(OBJ)
$(CC) -o ethanol.x ethanol.o $(OBJ) $(LIB)
air: air.x
air.x: air.o $(OBJ)
$(CC) -o air.x air.o $(OBJ) $(LIB)
galaxy: galaxy.x
galaxy.x: galaxy.o $(OBJ)
$(CC) -o galaxy.x galaxy.o $(OBJ) $(LIB)
clean:
rm -f *.o *.so *.x core *.pyc
rm -f loess.py loess_wrap.c

View File

@@ -1,63 +0,0 @@
#!/usr/bin/env python
"""
@package loess.setup
setup.py file for LOESS
the LOESS code included here was developed at Bell Labs by
William S. Cleveland, Eric Grosse, Ming-Jen Shyu,
and is dated 18 August 1992.
the code is available in the public domain
from http://www.netlib.org/a/dloess.
see the README file for details.
the Python wrapper was set up by M. Muntwiler
with the help of the SWIG toolkit
and other incredible goodies available in the Linux world.
@bug numpy.distutils.build_src in python 2.7 treats all Fortran files with f2py
so that they are compiled via both f2py and swig.
this produces extra object files which cause the linker to fail.
to fix this issue, this module hacks the build_src class.
this hack does not work with python 3. perhaps it's even unnecessary.
@author Matthias Muntwiler
@copyright (c) 2015-18 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
import numpy
try:
numpy_include = numpy.get_include()
except AttributeError:
numpy_include = numpy.get_numpy_include()
def configuration(parent_package='', top_path=None):
from numpy.distutils.misc_util import Configuration
config = Configuration('loess', parent_package, top_path)
lib = ['blas', 'm', 'f2c']
src = ['loess.c', 'loessc.c', 'predict.c', 'misc.c', 'loessf.f', 'dqrsl.f', 'dsvdc.f', 'fix_main.c', 'loess.i']
inc_dir = [numpy_include]
config.add_extension('_loess',
sources=src,
libraries=lib,
include_dirs=inc_dir
)
return config
def ignore_sources(self, sources, extension):
return sources
if __name__ == '__main__':
try:
from numpy.distutils.core import numpy_cmdclass
numpy_cmdclass['build_src'].f2py_sources = ignore_sources
except ImportError:
pass
from numpy.distutils.core import setup
setup(**configuration(top_path='').todict())

View File

@@ -1,38 +0,0 @@
SHELL=/bin/sh
# makefile for external programs and modules
#
# see the top-level makefile for additional information.
.PHONY: all clean edac loess msc mufpot phagen
EDAC_DIR = edac
MSC_DIR = msc
MUFPOT_DIR = mufpot
LOESS_DIR = loess
PHAGEN_DIR = calculators/phagen
all: edac loess
edac:
$(MAKE) -C $(EDAC_DIR)
loess:
$(MAKE) -C $(LOESS_DIR)
msc:
$(MAKE) -C $(MSC_DIR)
mufpot:
$(MAKE) -C $(MUFPOT_DIR)
phagen:
$(MAKE) -C $(PHAGEN_DIR)
clean:
$(MAKE) -C $(EDAC_DIR) clean
$(MAKE) -C $(LOESS_DIR) clean
$(MAKE) -C $(MSC_DIR) clean
$(MAKE) -C $(MUFPOT_DIR) clean
$(MAKE) -C $(PHAGEN_DIR) clean
rm -f *.pyc

View File

@@ -1 +0,0 @@
revision.f

Some files were not shown because too many files have changed in this diff Show More