public distro 2.1.0

This commit is contained in:
muntwiler_m 2019-07-19 12:54:54 +02:00
parent acea809e4e
commit fbd2d4fa8c
40 changed files with 2813 additions and 345 deletions

136
bin/pmsco.ra-git.template Normal file
View File

@ -0,0 +1,136 @@
#!/bin/bash
#
# Slurm script template for PMSCO calculations on the Ra cluster
# based on run_mpi_HPL_nodes-2.sl by V. Markushin 2016-03-01
#
# this version checks out the source code from a git repository
# to a temporary location and compiles the code.
# this is to minimize conflicts between different jobs
# but requires that each job has its own git commit.
#
# Use:
# - enter the appropriate parameters and save as a new file.
# - call the sbatch command to pass the job script.
# request a specific number of nodes and tasks.
# example:
# sbatch --nodes=2 --ntasks-per-node=24 --time=02:00:00 run_pmsco.sl
# the qpmsco script does all this for you.
#
# PMSCO arguments
# copy this template to a new file, and set the arguments
#
# PMSCO_WORK_DIR
# path to be used as working directory.
# contains the script derived from this template
# and a copy of the pmsco code in the 'pmsco' directory.
# receives output and temporary files.
#
# PMSCO_PROJECT_FILE
# python module that declares the project and starts the calculation.
# must include the file path relative to $PMSCO_WORK_DIR.
#
# PMSCO_OUT
# name of output file. should not include a path.
#
# all paths are relative to $PMSCO_WORK_DIR or (better) absolute.
#
#
# Further arguments
#
# PMSCO_JOBNAME (required)
# the job name is the base name for output files.
#
# PMSCO_WALLTIME_HR (integer, required)
# wall time limit in hours. must be integer, minimum 1.
# this value is passed to PMSCO.
# it should specify the same amount of wall time as requested from the scheduler.
#
# PMSCO_PROJECT_ARGS (optional)
# extra arguments that are parsed by the project module.
#
#SBATCH --job-name="_PMSCO_JOBNAME"
#SBATCH --output="_PMSCO_JOBNAME.o.%j"
#SBATCH --error="_PMSCO_JOBNAME.e.%j"
PMSCO_WORK_DIR="_PMSCO_WORK_DIR"
PMSCO_JOBNAME="_PMSCO_JOBNAME"
PMSCO_WALLTIME_HR=_PMSCO_WALLTIME_HR
PMSCO_PROJECT_FILE="_PMSCO_PROJECT_FILE"
PMSCO_OUT="_PMSCO_JOBNAME"
PMSCO_PROJECT_ARGS="_PMSCO_PROJECT_ARGS"
module load psi-python36/4.4.0
module load gcc/4.8.5
module load openmpi/3.1.3
source activate pmsco3
echo '================================================================================'
echo "=== Running $0 at the following time and place:"
date
/bin/hostname
cd $PMSCO_WORK_DIR
pwd
ls -lA
#the intel compiler is currently not compatible with mpi4py. -mm 170131
#echo
#echo '================================================================================'
#echo "=== Setting the environment to use Intel Cluster Studio XE 2016 Update 2 intel/16.2:"
#cmd="source /opt/psi/Programming/intel/16.2/bin/compilervars.sh intel64"
#echo $cmd
#$cmd
echo
echo '================================================================================'
echo "=== The environment is set as following:"
env
echo
echo '================================================================================'
echo "BEGIN test"
which mpirun
cmd="mpirun /bin/hostname"
echo $cmd
$cmd
echo "END test"
echo
echo '================================================================================'
echo "BEGIN mpirun pmsco"
echo
cd "$PMSCO_WORK_DIR"
cd pmsco
echo "code revision"
git log --pretty=tformat:'%h %ai %d' -1
make -C pmsco all
python -m compileall pmsco
python -m compileall projects
echo
cd "$PMSCO_WORK_DIR"
PMSCO_CMD="python pmsco/pmsco $PMSCO_PROJECT_FILE"
PMSCO_ARGS="$PMSCO_PROJECT_ARGS"
if [ -n "$PMSCO_SCAN_FILES" ]; then
PMSCO_ARGS="-s $PMSCO_SCAN_FILES $PMSCO_ARGS"
fi
if [ -n "$PMSCO_OUT" ]; then
PMSCO_ARGS="-o $PMSCO_OUT $PMSCO_ARGS"
fi
if [ "$PMSCO_WALLTIME_HR" -ge 1 ]; then
PMSCO_ARGS="-t $PMSCO_WALLTIME_HR $PMSCO_ARGS"
fi
if [ -n "$PMSCO_LOGLEVEL" ]; then
PMSCO_ARGS="--log-level $PMSCO_LOGLEVEL --log-file $PMSCO_JOBNAME.log $PMSCO_ARGS"
fi
# Do no use the OpenMPI specific options, like "-x LD_LIBRARY_PATH", with the Intel mpirun.
cmd="mpirun $PMSCO_CMD $PMSCO_ARGS"
echo $cmd
$cmd
echo "END mpirun pmsco"
echo '================================================================================'
cd "$PMSCO_WORK_DIR"
rm -rf pmsco
date
ls -lAtr
echo '================================================================================'
exit 0

View File

@ -75,10 +75,10 @@ PMSCO_OUT="_PMSCO_JOBNAME"
PMSCO_LOGLEVEL="_PMSCO_LOGLEVEL"
PMSCO_PROJECT_ARGS="_PMSCO_PROJECT_ARGS"
module load psi-python27/2.4.1
module load psi-python36/4.4.0
module load gcc/4.8.5
module load openmpi/1.10.2
source activate pmsco
module load openmpi/3.1.3
source activate pmsco3
echo '================================================================================'
echo "=== Running $0 at the following time and place:"

145
bin/qpmsco.ra-git.sh Executable file
View File

@ -0,0 +1,145 @@
#!/bin/sh
#
# submission script for PMSCO calculations on the Ra cluster
#
# this version clones the current git repository at HEAD to the work directory.
# thus, version conflicts between jobs are avoided.
#
if [ $# -lt 1 ]; then
echo "Usage: $0 [NOSUB] GIT_TAG DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT [ARGS [ARGS [...]]]"
echo ""
echo " NOSUB (optional): do not submit the script to the queue. default: submit."
echo " GIT_TAG: git tag or branch name of the code. HEAD for current code."
echo " DESTDIR: destination directory. must exist. a sub-dir \$JOBNAME is created."
echo " JOBNAME (text): name of job. use only alphanumeric characters, no spaces."
echo " NODES (integer): number of computing nodes. (1 node = 24 or 32 processors)."
echo " do not specify more than 2."
echo " TASKS_PER_NODE (integer): 1...24, or 32."
echo " 24 or 32 for full-node allocation."
echo " 1...23 for shared node allocation."
echo " WALLTIME:HOURS (integer): requested wall time."
echo " 1...24 for day partition"
echo " 24...192 for week partition"
echo " 1...192 for shared partition"
echo " PROJECT: python module (file path) that declares the project and starts the calculation."
echo " ARGS (optional): any number of further PMSCO or project arguments (except time)."
echo ""
echo "the job script is written to \$DESTDIR/\$JOBNAME which is also the destination of calculation output."
exit 1
fi
# location of the pmsco package is derived from the path of this script
SCRIPTDIR="$(dirname $(readlink -f $0))"
SOURCEDIR="$(readlink -f $SCRIPTDIR/..)"
PMSCO_SOURCE_DIR="$SOURCEDIR"
# read arguments
if [ "$1" == "NOSUB" ]; then
NOSUB="true"
shift
else
NOSUB="false"
fi
if [ "$1" == "HEAD" ]; then
BRANCH_ARG=""
else
BRANCH_ARG="-b $1"
fi
shift
DEST_DIR="$1"
shift
PMSCO_JOBNAME=$1
shift
PMSCO_NODES=$1
PMSCO_TASKS_PER_NODE=$2
PMSCO_TASKS=$(expr $PMSCO_NODES \* $PMSCO_TASKS_PER_NODE)
shift 2
PMSCO_WALLTIME_HR=$1
PMSCO_WALLTIME_MIN=$(expr $PMSCO_WALLTIME_HR \* 60)
shift
# select partition
if [ $PMSCO_WALLTIME_HR -ge 25 ]; then
PMSCO_PARTITION="week"
else
PMSCO_PARTITION="day"
fi
if [ $PMSCO_TASKS_PER_NODE -lt 24 ]; then
PMSCO_PARTITION="shared"
fi
PMSCO_PROJECT_FILE="$(readlink -f $1)"
shift
PMSCO_PROJECT_ARGS="$*"
# set up working directory
cd "$DEST_DIR"
if [ ! -d "$PMSCO_JOBNAME" ]; then
mkdir "$PMSCO_JOBNAME"
fi
cd "$PMSCO_JOBNAME"
WORKDIR="$(pwd)"
PMSCO_WORK_DIR="$WORKDIR"
# copy code
PMSCO_SOURCE_REPO="file://$PMSCO_SOURCE_DIR"
echo "$PMSCO_SOURCE_REPO"
cd "$PMSCO_WORK_DIR"
git clone $BRANCH_ARG --single-branch --depth 1 $PMSCO_SOURCE_REPO pmsco || exit
cd pmsco
PMSCO_REV=$(git log --pretty=format:"%h, %ai" -1) || exit
cd "$WORKDIR"
echo "$PMSCO_REV" > revision.txt
# generate job script from template
sed -e "s:_PMSCO_WORK_DIR:$PMSCO_WORK_DIR:g" \
-e "s:_PMSCO_JOBNAME:$PMSCO_JOBNAME:g" \
-e "s:_PMSCO_NODES:$PMSCO_NODES:g" \
-e "s:_PMSCO_WALLTIME_HR:$PMSCO_WALLTIME_HR:g" \
-e "s:_PMSCO_PROJECT_FILE:$PMSCO_PROJECT_FILE:g" \
-e "s:_PMSCO_PROJECT_ARGS:$PMSCO_PROJECT_ARGS:g" \
"$SCRIPTDIR/pmsco.ra-git.template" > $PMSCO_JOBNAME.job
chmod u+x "$PMSCO_JOBNAME.job" || exit
# request nodes and tasks
#
# The option --ntasks-per-node is meant to be used with the --nodes option.
# (For the --ntasks option, the default is one task per node, use the --cpus-per-task option to change this default.)
#
# sbatch options
# --cores-per-socket=16
# 32 cores per node
# --partition=[shared|day|week]
# --time=8-00:00:00
# override default time limit (2 days in long queue)
# time formats: "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes", "days-hours:minutes:seconds"
# --mail-type=ALL
# --test-only
# check script but do not submit
#
SLURM_ARGS="--nodes=$PMSCO_NODES --ntasks-per-node=$PMSCO_TASKS_PER_NODE"
if [ $PMSCO_TASKS_PER_NODE -gt 24 ]; then
SLURM_ARGS="--cores-per-socket=16 $SLURM_ARGS"
fi
SLURM_ARGS="--partition=$PMSCO_PARTITION $SLURM_ARGS"
SLURM_ARGS="--time=$PMSCO_WALLTIME_HR:00:00 $SLURM_ARGS"
CMD="sbatch $SLURM_ARGS $PMSCO_JOBNAME.job"
echo $CMD
if [ "$NOSUB" != "true" ]; then
$CMD
fi
exit 0

View File

@ -1,6 +1,12 @@
#!/bin/sh
#
# submission script for PMSCO calculations on the Ra cluster
#
# CAUTION: the job will execute the pmsco code which is present in the directory tree
# of this script _at the time of job execution_, not submission!
# before changing the code, make sure that all pending jobs have started execution,
# otherwise you will experience version conflicts.
# it's better to use the qpmsco.ra-git.sh script which clones the code.
if [ $# -lt 1 ]; then
echo "Usage: $0 [NOSUB] DESTDIR JOBNAME NODES TASKS_PER_NODE WALLTIME:HOURS PROJECT MODE [ARGS [ARGS [...]]]"
@ -87,9 +93,9 @@ PMSCO_WORK_DIR="$WORKDIR"
# provide revision information, requires git repository
cd "$SOURCEDIR"
PMSCO_REV=$(git log --pretty=format:"Data revision %h, %ai" -1)
PMSCO_REV=$(git log --pretty=format:"%h, %ai" -1)
if [ $? -ne 0 ]; then
PMSCO_REV="Data revision unknown, "$(date +"%F %T %z")
PMSCO_REV="revision unknown, "$(date +"%F %T %z")
fi
cd "$WORKDIR"
echo "$PMSCO_REV" > revision.txt

View File

@ -86,9 +86,9 @@ PHD_WORK_DIR="$WORKDIR"
# provide revision information, requires git repository
cd "$SOURCEDIR"
PHD_REV=$(git log --pretty=format:"Data revision %h, %ad" --date=iso -1)
PHD_REV=$(git log --pretty=format:"%h, %ad" --date=iso -1)
if [ $? -ne 0 ]; then
PHD_REV="Data revision unknown, "$(date +"%F %T %z")
PHD_REV="revision unknown, "$(date +"%F %T %z")
fi
cd "$WORKDIR"
echo "$PHD_REV" > revision.txt

View File

@ -763,6 +763,7 @@ src/introduction.dox \
src/concepts.dox \
src/concepts-tasks.dox \
src/concepts-emitter.dox \
src/concepts-atomscat.dox \
src/installation.dox \
src/execution.dox \
src/commandline.dox \

View File

@ -21,9 +21,6 @@ Do not include the extension <code>.py</code> or a trailing slash.
@c path/to/project.py should be the path and name to your project module.
Common args and project args are described below.
Note: In contrast to earlier versions, the project module is not executed directly any more.
Rather, it is loaded by the main pmsco module as a 'plug-in'.
\subsection sec_common_args Common Arguments
@ -43,15 +40,14 @@ The following table is ordered by importance.
| --log-level | DEBUG, INFO, WARNING (default), ERROR, CRITICAL | Minimum level of messages that should be added to the log. |
| --log-file | file system path | Name of the main log file. Under MPI, the rank of the process is inserted before the extension. Default: output-file + log, or pmsco.log. |
| --log-disable | | Disable logging. By default, logging is on. |
| --pop-size | integer | Population size (number of particles) in swarm optimization mode. The default value is the greater of 4 or two times the number of calculation processes. |
| --pop-size | integer | Population size (number of particles) in swarm and genetic optimization mode. The default value is the greater of 4 or the number of parallel calculation processes. |
| --seed-file | file system path | Name of the population seed file. Population data of previous optimizations can be used to seed a new optimization. The file must have the same structure as the .pop or .dat files. See @ref pmsco.project.Project.seed_file. |
| --table-file | file system path | Name of the model table file in table scan mode. |
| -c, --code | edac (default) | Scattering code. At the moment, only edac is supported. |
\subsubsection sec_file_categories File Categories
The following category names can be used with the @c --keep-files option.
The following category names can be used with the `--keep-files` option.
Multiple names can be specified and must be separated by spaces.
| Category | Description | Default Action |
@ -59,7 +55,7 @@ Multiple names can be specified and must be separated by spaces.
| all | shortcut to include all categories | |
| input | raw input files for calculator, including cluster and phase files in custom format | delete |
| output | raw output files from calculator | delete |
| phase | phase files in portable format for report | delete |
| atomic | atomic scattering and emission files in portable format | delete |
| cluster | cluster files in portable XYZ format for report | keep |
| debug | debug files | delete |
| model | output files in ETPAI format: complete simulation (a_-1_-1_-1_-1) | keep |
@ -67,9 +63,20 @@ Multiple names can be specified and must be separated by spaces.
| symmetry | output files in ETPAI format: symmetry (a_b_c_-1_-1) | delete |
| emitter | output files in ETPAI format: emitter (a_b_c_d_-1) | delete |
| region | output files in ETPAI format: region (a_b_c_d_e) | delete |
| report| final report of results | keep |
| report| final report of results | keep always |
| population | final state of particle population | keep |
| rfac | files related to models which give bad r-factors | delete |
| rfac | files related to models which give bad r-factors, see warning below | delete |
\note
The `report` category is always kept and cannot be turned off.
The `model` category is always kept in single calculation mode.
\warning
If you want to specify `rfac` with the `--keep-files` option,
you have to add the file categories that you want to keep, e.g.,
`--keep-files rfac cluster model scan population`
(to return the default categories for all calculated models).
Do not specify `rfac` alone as this will effectively not return any file.
\subsection sec_project_args Project Arguments
@ -125,4 +132,4 @@ The job script is written to @c $DESTDIR/$JOBNAME which is also the destination
| MODE | single, swarm, grid, genetic | PMSCO operation mode. This value is passed on to PMSCO as the @c --mode argument. |
| ARGS (optional) | | Any further arguments are passed on verbatim to PMSCO. You don't need to specify the mode and time limit here. |
*/
*/

View File

@ -0,0 +1,114 @@
/*! @page pag_concepts_atomscat Atomic scattering
\section sec_atomscat Atomic scattering
\subsection sec_atomscat_intro Introduction
The process of calculating atomic scattering factors (phase shifts) can be customized in several ways.
1. Internal processing.
Some multiple scattering programs, like EDAC, contain a built-in facility to calculate phase shifts.
This is the most simple and default behaviour.
2. Automatic calculation in a separate program.
PMSCO has an interface to run the PHAGEN program from
the [MsSpec-1.0 package](https://ipr.univ-rennes1.fr/msspec) to calculate scattering factors.
Note that the PHAGEN code is not included in the public distribution of PMSCO.
3. Manual calculation.
Scattering files created manually using an external program can be used by providing the file names.
The files must have the format required by the multiple scattering code,
and they must be linked to the corresponding atoms of the cluster.
In the case of automatic calculation, the project code can optionally hook into the process
and modify clusters before and after scattering factors are calculated.
For instance, it may provide an extended cluster in order to reduce boundary effects,
or it may modify the assignment of scattering files to cluster atoms
so that the scattering factors of selected atom classes are used
(cf. section \ref sec_atomscat_atomclass).
\subsection sec_atomscat_usage Usage
\subsubsection sec_atomscat_internal Internal processing
This is the default behaviour selected in the inherited pmsco.project.Project class.
Make sure not to override the `atomic_scattering_factory` attribute.
Its default value is pmsco.calculators.calculator.InternalAtomicCalculator.
\subsubsection sec_atomscat_external Automatic calculation in a separate program
To select the atomic scattering calculator,
assign its interface class to the project's `atomic_scattering_factory` attribute.
For example, to use PHAGEN, add the following code to your project's `__init__` constructor:
@code{.py}
from pmsco.calculators.phagen import PhagenCalculator
self.atomic_scattering_factory = PhagenCalculator
@endcode
\subsubsection sec_atomscat_manual Manual calculation
If you want to keep the scattering factors constant during an optimization,
you should run PMSCO in _single_ mode and provide the model parameters and cluster
that will return the desired scattering files.
In the `create_params` method of your project,
you should then set the `phase_files` attribute,
which is a dictionary that maps atom classes to the names of the scattering files.
Unless you set specific values in the cluster object, the atom class defaults to the element number.
The file names should include a path relative to the working directory.
\subsection sec_atomscat_implement Implementation
\subsubsection sec_atomscat_atomclass Atom classes
Atomic scattering programs classify atoms based on chemical element, charge state and symmetry of the local environment.
This means that two atoms of the same chemical element may have different scattering factors.
For example, if you have EDAC output the cluster after calculation of the muffin tin potential,
you will find that the chemical element number has been replaced by an arbitrary integer.
By default, PMSCO will do the linking of atom classes and scattering files transparently.
However, if you want to reduce the number of atom classes,
or if you have the scattering factors calculated on a reference cluster,
you will have to provide project code to do the assignment.
This is described further below.
\subsubsection sec_atomscat_calculator Atomic scattering calculator
The project selects the atomic scattering calculation mode by specifying its `atomic_scattering_factory` attributed.
This is the name of a class that inherits from @ref pmsco.calculators.calculator.AtomicCalculator.
The following calculators are currently implemented:
| Class | Description |
| --- | --- |
| pmsco.calculators.calculator.InternalAtomicCalculator | Calculate the atomic scattering factors in the multiple-scattering program. |
| pmsco.calculators.phagen.PhagenCalculator | Calculate the atomic scattering factors in the PHAGEN program. |
An atomic calculator class essentially defines a `run` method that operates on a cluster and scattering parameters object.
It generates the necessary scattering files, updates the cluster with the new atom classes
and updates the parameters with the file names of the scattering files.
Note that the scattering files have to be in the correct format for the multiple scattering calculator.
\subsubsection sec_atomscat_hooks Project hooks
Before and after calculation of the scattering factors,
the project's `before_atomic_scattering` and `after_atomic_scattering` methods are called
with the cluster and input parameters.
The _before_ method provides the cluster to be used for atomic scattering calculations.
It may,
1. just return the original cluster,
2. modify the provided cluster to include additional atoms or modify the charge state of the emitter,
3. create a completely different cluster,
4. return None to suppress the atomic scattering calculation.
The method is called once at the beginning of the PMSCO job with model -1,
where it may return the global reference cluster.
Later on it is called once for each calculation task with the specific task index.
Similarly, the _after_ method collects the results and updates the `phase_files` dictionary of the input parameters.
It is free to consolidate atom classes and remove unwanted atoms.
However, it must make sure that for each atom class in the cluster,
there is a corresponding link to a scattering file.
*/

View File

@ -39,8 +39,8 @@ The code depends on the following libraries:
- BLAS
- LAPACK
- Python 2.7 or 3.6
- Numpy >= 1.11
- Python packages from PyPI listed in the requirements.txt file
- Numpy >= 1.13
- Python packages listed in the requirements.txt file
Most of these requirements are available from the Linux distribution.
For an easily maintainable Python environment, [Miniconda](https://conda.io/miniconda.html) is recommended.
@ -50,11 +50,11 @@ and it's difficult to switch between different Python versions.
On the PSI cluster machines, the environment must be set using the module system and conda (on Ra).
Details are explained in the PEARL Wiki.
PMSCO runs under Python 2.7 or Python 3.6 or higher.
Since Python 2 is being deprecated, the code has been ported to Python 3.6.
Compatibility with Python 2.7 is maintained by using
the [future package](http://python-future.org/compatible_idioms.html).
New code should be written according to their guidelines.
PMSCO runs under Python 2.7 or Python 3.6.
Since Python 2 is being deprecated, Python 3.6 is recommended.
Compatibility with Python 2.7 is currently maintained by using
the [future package](http://python-future.org/compatible_idioms.html)
but may be dropped at any time.
\subsection sec_install_instructions Instructions
@ -86,7 +86,6 @@ nano \
openmpi-bin \
openmpi-common \
sqlite3 \
swig \
wget
@endcode
@ -102,11 +101,11 @@ Install Miniconda according to their [instructions](https://conda.io/docs/user-g
then configure the Python environment:
@code{.sh}
conda create -q --yes -n pmsco python=2.7
conda create -q --yes -n pmsco python=3.6
source activate pmsco
conda install -q --yes -n pmsco \
pip \
numpy \
"numpy>=1.13" \
scipy \
ipython \
mpi4py \
@ -114,7 +113,9 @@ conda install -q --yes -n pmsco \
nose \
mock \
future \
statsmodels
statsmodels \
swig \
gitpython
pip install periodictable attrdict fasteners
@endcode

View File

@ -9,13 +9,15 @@ The actual scattering calculation is done by code developed by other parties.
While the scattering program typically calculates a diffraction pattern based on a set of static parameters and a specific coordinate file in a single process,
PMSCO wraps around that program to facilitate parameter handling, cluster building, structural optimization and parallel processing.
In the current version, the [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/) code
developed by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley (1999) is used for scattering calculations.
Other code can be integrated as well.
Initially, support for the MSC program by Kaduwela, Friedman, and Fadley was planned but is currently not maintained.
PMSCO is written in Python 2.7.
EDAC is written in C++, MSC in Fortran.
PMSCO interacts with the calculation programs through Python wrappers for C++ or Fortran.
In the current version, PMSCO can make use of the following programs.
Other programs may be integrated as well.
- [EDAC](http://garciadeabajos-group.icfo.es/widgets/edac/)
by F. J. García de Abajo, M. A. Van Hove, and C. S. Fadley,
[Phys. Rev. B 63 (2001) 075404](http://dx.doi.org/10.1103/PhysRevB.63.075404)
- PHAGEN from the [MsSpec package](https://ipr.univ-rennes1.fr/msspec)
by C. R. Natoli and D. Sébilleau,
[Comp. Phys. Comm. 182 (2011) 2567](http://dx.doi.org/10.1016/j.cpc.2011.07.012)
\section sec_intro_highlights Highlights
@ -63,11 +65,11 @@ An open distribution of PMSCO is available under the [Apache License, Version 2.
- Please acknowledge the use of the code.
- Please share your development of the code with the original author.
Due to different copyright, the MSC and EDAC programs are not contained in the public software repository.
Due to different copyright terms, the third-party calculation programs are not contained in the public software repository.
These programs may not be used without an explicit agreement by the respective original authors.
\author Matthias Muntwiler, <mailto:matthias.muntwiler@psi.ch>
\version This documentation is compiled from version $(REVISION).
\copyright 2015-2018 by [Paul Scherrer Institut](http://www.psi.ch)
\copyright 2015-2019 by [Paul Scherrer Institut](http://www.psi.ch)
\copyright Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
*/

View File

@ -30,6 +30,55 @@ The domain parameters have the following meanings:
| step | Not used. |
\subsubsection sec_opt_seed Seeding a population
By default, one particle is initialized with the start value declared in the parameter domain,
and the other are set to random values within the domain.
You may initialize more particles of the population with specific values by providing a seed file.
The seed file must have a similar format as the result `.dat` files
with a header line specifying the column names and data rows containing the values for each particle.
A good practice is to use a previous `.dat` file and remove unwanted rows.
To continue an interrupted optimization,
the `.dat` file from the previous optimization can be used as is.
The seeding procedure can be tweaked by several optimizer parameters (see above).
PMSCO normally loads the first rows up to population size - 1 or up to the `seed_limit` parameter,
whichever is lower.
If an `_rfac` column is present, the file is first sorted by R-factor and only the best models are loaded.
Models that resulted in an R-factor above the `rfac_limit` parameter are always ignored.
During the optimization process, all models loaded from the seed file are normally re-calculated.
This may waste CPU time if the calculation is run under the same conditions
and would result in exactly the same R-factor,
as is the case if the seed is used to continue a previous optimization, for example.
In these situations, the `recalc_seed` parameter can be set to False,
and PMSCO will use the R-factor value from the seed file rather than calculating the model again.
\subsubsection sec_opt_patch Patching a running optimization
While an optimization process is running, the user can manually patch the population with arbitrary values,
for instance, to kick the population out of a local optimum or to drive it to a less sampled parameter region.
To patch a running population, prepare a population file named `pmsco_patch.pop` and copy it to the work directory.
The file must have a similar format as the result `.dat` files
with a header line specifying the column names and data rows containing the values.
It should contain as many rows as particles to be patched but not more than the size of the population.
The columns must include a `_particle` column which specifies the particle to patch
as well as the model parameters to be changed.
Parameters that should remain unaffected can be left out,
extra columns including `_gen`, `_rfac` etc. are ignored.
PMSCO checks the file for syntax errors and ignores it if errors are present.
Parameter values that lie outside the domain boundary are ignored.
Successful or failed patching is logged at warning level.
The patch file is re-applied whenever its time stamp has changed.
\attention Do not edit the patch file in the working directory
to prevent it from being read in an unfinished state or multiple times.
\subsection sec_opt_genetic Genetic optimization
The genetic algorithm evolves a population of individuals
@ -73,11 +122,14 @@ The domain parameters have the following meanings:
| max | Upper limit of the parameter range. |
| step | Standard deviation of the Gaussian distribution of weak mutations. The step should not be much lower than the the parameter range divided by the population size and not greater than one third of the parameter range. |
The population of the genetic optimizer can be seeded and patched in the same way as the particle swarm,
cf. sections @ref sec_opt_seed and @ref sec_opt_swarm.
\subsection sec_opt_grid Grid search
The grid search algorithm samples the parameter space at equidistant steps.
The order of calculations is randomized so that early results represent various parts of the parameter space.
The order of calculations is randomized so that distant parts of the parameter space are sampled at an early stage.
| Parameter | Description |
| --- | --- |

View File

@ -25,7 +25,7 @@ class Scan {
thetas
phis
alphas
set_scan()
import_scan_file()
}
class Domain {

View File

@ -64,7 +64,6 @@ try agent forwarding (-A option to ssh).
openmpi-bin \
openmpi-common \
sqlite3 \
swig \
wget
apt-get clean
@ -76,7 +75,7 @@ try agent forwarding (-A option to ssh).
. /usr/local/miniconda3/bin/activate pmsco
conda install -q --yes -n pmsco \
pip \
numpy \
"numpy>=1.13" \
scipy \
ipython \
mpi4py \
@ -84,7 +83,8 @@ try agent forwarding (-A option to ssh).
nose \
mock \
future \
statsmodels
statsmodels \
swig
conda clean --all -y
/usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners

View File

@ -63,7 +63,6 @@ try agent forwarding (-A option to ssh).
openmpi-bin \
openmpi-common \
sqlite3 \
swig \
wget
apt-get clean
@ -75,7 +74,7 @@ try agent forwarding (-A option to ssh).
. /usr/local/miniconda3/bin/activate pmsco
conda install -q --yes -n pmsco \
pip \
numpy \
"numpy>=1.13" \
scipy \
ipython \
mpi4py \
@ -83,7 +82,8 @@ try agent forwarding (-A option to ssh).
nose \
mock \
future \
statsmodels
statsmodels \
swig
conda clean --all -y
/usr/local/miniconda3/envs/pmsco/bin/pip install periodictable attrdict fasteners

View File

@ -35,7 +35,7 @@ SHELL=/bin/sh
# make all
#
.PHONY: all bin docs clean edac loess msc mufpot
.PHONY: all bin docs clean edac loess msc mufpot phagen
PMSCO_DIR = pmsco
DOCS_DIR = docs
@ -44,7 +44,7 @@ all: edac loess docs
bin: edac loess
edac loess msc mufpot:
edac loess msc mufpot phagen:
$(MAKE) -C $(PMSCO_DIR)
docs:

View File

@ -11,7 +11,7 @@ TestCalcInterface is provided for testing the PMSCO code quickly without calling
@author Matthias Muntwiler
@copyright (c) 2015-18 by Paul Scherrer Institut @n
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
@ -56,11 +56,25 @@ class Calculator(object):
@arg the first element is the name of the main ETPI or ETPAI result file to be further processed.
@arg the second element is a dictionary that lists the names of all created data files with their category.
the dictionary key is the file name,
the value is the file category (cluster, phase, etc.).
the value is the file category (cluster, atomic, etc.).
"""
return None, None
class AtomicCalculator(Calculator):
"""
abstract interface class to the atomic scattering calculation program.
"""
pass
class InternalAtomicCalculator(AtomicCalculator):
"""
dummy atomic scattering class if scattering factors are calculated internally by the multiple scattering calculator.
"""
pass
class TestCalculator(Calculator):
"""
interface class producing random data for testing the MSCO code without calling an external program.

View File

@ -55,7 +55,10 @@ class EdacCalculator(calculator.Calculator):
@param filepath: (str) name and path of the file to be created.
@return dictionary of created files {filename: category}
"""
files = {}
with open(filepath, "w") as f:
f.write("verbose off\n")
f.write("cluster input {0}\n".format(params.cluster_file))
@ -146,9 +149,16 @@ class EdacCalculator(calculator.Calculator):
scatterers = ["scatterer {at} {fi}\n".format(at=at, fi=fi)
for (at, fi) in params.phase_files.items()
if os.path.isfile(fi)]
if scatterers:
rme = ["rmat {fi}\n".format(fi=fi)
for (at, fi) in params.rme_files.items()
if at == params.emitters[0][3] and os.path.isfile(fi)] or \
["rmat inline 1 regular1 {l0} {pv} {pd} {mv} {md}\n".format(l0=params.l_init,
pv=params.rme_plus_value, pd=params.rme_plus_shift,
mv=params.rme_minus_value, md=params.rme_minus_shift)]
if scatterers and rme:
for scat in scatterers:
f.write(scat)
f.write(rme[0])
else:
f.write("muffin-tin\n")
@ -162,16 +172,27 @@ class EdacCalculator(calculator.Calculator):
f.write("orders {0:d} ".format(len(params.orders)))
f.write(" ".join(format(order, "d") for order in params.orders) + "\n")
f.write("emission angle window {0:F}\n".format(params.angular_resolution / 2.0))
# f.write("cluster output l(A) out.clu")
# problems:
# - muffin-tin relabels atoms
# - there can be multiple atom types for the same chemical element
# - we have to compare coordinates to find the mapping between input and output cluster
# f.write("scan scatterer i phase-shifts i.pha")
# f.write("scan scatterer i potential i.pot")
# scattering factor output (see project.Params.phase_output_classes)
if params.phase_output_classes is not None:
fn = "{0}.clu".format(params.output_file)
f.write("cluster output l(A) {fn}\n".format(fn=fn))
files[fn] = "output"
try:
cls = (cl for cl in params.phase_output_classes)
except TypeError:
cls = range(params.phase_output_classes)
for cl in cls:
fn = "{of}.{cl}.scat".format(cl=cl, of=params.output_file)
f.write("scan scatterer {cl} phase-shifts {fn}\n".format(cl=cl, fn=fn))
files[fn] = "output"
f.write("scan pd {0}\n".format(params.output_file))
files[params.output_file] = "output"
f.write("end\n")
return files
def run(self, params, cluster, scan, output_file):
"""
run EDAC with the given parameters and cluster.
@ -205,13 +226,13 @@ class EdacCalculator(calculator.Calculator):
params.cluster_file = clu_filename
params.output_file = out_filename
params.data_file = dat_filename
params.emitters = cluster.get_emitters()
params.emitters = cluster.get_emitters(['x', 'y', 'z', 'c'])
# save parameter files
logger.debug("writing cluster file %s", clu_filename)
cluster.save_to_file(clu_filename, fmt=mc.FMT_EDAC)
logger.debug("writing input file %s", par_filename)
self.write_input_file(params, scan, par_filename)
files = self.write_input_file(params, scan, par_filename)
# run EDAC
logger.info("calling EDAC with input file %s", par_filename)
@ -244,6 +265,9 @@ class EdacCalculator(calculator.Calculator):
logger.debug("save result to file %s", etpi_filename)
md.save_data(etpi_filename, result_etpi)
files = {clu_filename: 'input', par_filename: 'input', dat_filename: 'output',
etpi_filename: 'region'}
files[clu_filename] = 'input'
files[par_filename] = 'input'
files[dat_filename] = 'output'
files[etpi_filename] = 'region'
return etpi_filename, files

View File

View File

@ -0,0 +1,43 @@
SHELL=/bin/sh
# makefile for PHAGEN program and module
#
# the PHAGEN source code is not included in the public distribution.
# please obtain the PHAGEN code from the original author,
# and copy it to this directory before compilation.
#
# see the top-level makefile for additional information.
.SUFFIXES:
.SUFFIXES: .c .cpp .cxx .exe .f .h .i .o .py .pyf .so
.PHONY: all clean phagen
FC?=gfortran
F2PY?=f2py
F2PYOPTS?=
CC?=gcc
CCOPTS?=
SWIG?=swig
SWIGOPTS?=
PYTHON?=python
PYTHONOPTS?=
PYTHONINC?=
PYTHON_CONFIG = ${PYTHON}-config
PYTHON_CFLAGS ?= $(shell ${PYTHON_CONFIG} --cflags)
PYTHON_EXT_SUFFIX ?= $(shell ${PYTHON_CONFIG} --extension-suffix)
all: phagen
phagen: phagen.exe phagen$(EXT_SUFFIX)
phagen.exe: phagen_scf.f msxas3.inc msxasc3.inc
$(FC) $(FCOPTS) -o phagen.exe phagen_scf.f
phagen.pyf: | phagen_scf.f
$(F2PY) -h phagen.pyf -m phagen phagen_scf.f only: libmain
phagen$(EXT_SUFFIX): phagen_scf.f phagen.pyf msxas3.inc msxasc3.inc
$(F2PY) -c $(F2PYOPTS) -m phagen phagen.pyf phagen_scf.f
clean:
rm -f *.so *.o *.exe

View File

@ -0,0 +1,102 @@
--- phagen_scf.orig.f 2019-06-05 16:45:52.977855859 +0200
+++ phagen_scf.f 2019-05-09 16:32:35.790286429 +0200
@@ -174,6 +174,99 @@
1100 format(//,1x,' ** phagen terminated normally ** ',//)
end
+
+c-----------------------------------------------------------------------
+ subroutine libmain(infile,outfile,etcfile)
+c main calculation routine
+c entry point for external callers
+c
+c infile: name of parameter input file
+c
+c outfile: base name of output files
+c output files with endings .list, .clu, .pha, .tl, .rad
+c will be created
+c-----------------------------------------------------------------------
+ implicit real*8 (a-h,o-z)
+c
+ include 'msxas3.inc'
+ include 'msxasc3.inc'
+
+ character*60 infile,outfile,etcfile
+ character*70 listfile,clufile,tlfile,radfile,phafile
+
+c
+c.. constants
+ antoau = 0.52917715d0
+ pi = 3.141592653589793d0
+ ev = 13.6058d0
+ zero = 0.d0
+c.. threshold for linearity
+ thresh = 1.d-4
+c.. fortran io units
+ idat = 5
+ iwr = 6
+ iphas = 30
+ iedl0 = 31
+ iwf = 32
+ iof = 17
+
+ iii=LnBlnk(outfile)+1
+ listfile=outfile
+ listfile(iii:)='.list'
+ clufile=outfile
+ clufile(iii:)='.clu'
+ phafile=outfile
+ phafile(iii:)='.pha'
+ tlfile=outfile
+ tlfile(iii:)='.tl'
+ radfile=outfile
+ radfile(iii:)='.rad'
+
+ open(idat,file=infile,form='formatted',status='old')
+ open(iwr,file=listfile,form='formatted',status='unknown')
+ open(10,file=clufile,form='formatted',status='unknown')
+ open(35,file=tlfile,form='formatted',status='unknown')
+ open(55,file=radfile,form='formatted',status='unknown')
+ open(iphas,file=phafile,form='formatted',status='unknown')
+
+ open(iedl0,form='unformatted',status='scratch')
+ open(iof,form='unformatted',status='scratch')
+ open(unit=21,form='unformatted',status='scratch')
+ open(60,form='formatted',status='scratch')
+ open(50,form='formatted',status='scratch')
+ open(unit=13,form='formatted',status='scratch')
+ open(unit=14,form='formatted',status='scratch')
+ open(unit=11,status='scratch')
+ open(unit=iwf,status='scratch')
+ open(unit=33,status='scratch')
+ open(unit=66,status='scratch')
+
+ call inctrl
+ call intit(iof)
+ call incoor
+ call calphas
+
+ close(idat)
+ close(iwr)
+ close(10)
+ close(35)
+ close(55)
+ close(iphas)
+ close(iedl0)
+ close(iof)
+ close(60)
+ close(50)
+ close(13)
+ close(14)
+ close(11)
+ close(iwf)
+ close(33)
+ close(66)
+ close(21)
+
+ endsubroutine
+
+
subroutine inctrl
implicit real*8 (a-h,o-z)
include 'msxas3.inc'

View File

@ -0,0 +1,153 @@
"""
@package pmsco.calculators.phagen.runner
Natoli/Sebilleau PHAGEN interface
this module runs the PHAGEN program to calculate scattering factors and radial matrix element.
@author Matthias Muntwiler
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import os
import shutil
import tempfile
from pmsco.calculators.calculator import AtomicCalculator
from pmsco.calculators.phagen.phagen import libmain
from pmsco.calculators.phagen.translator import Translator
import pmsco.cluster
logger = logging.getLogger(__name__)
class PhagenCalculator(AtomicCalculator):
"""
use the PHAGEN program to calculate scattering factors and radial matrix element.
this produces scatterer, radial matrix element and cluster files for EDAC.
"""
def run(self, params, cluster, scan, output_file):
"""
create the input file, run PHAGEN, and translate the output to EDAC format.
the following files are created in the job work directory:
- scattering factor files in EDAC format.
their names are `output_file + "_{atomclass}.scat"`.
- radial matrix element file in EDAC format.
its name is `output_file + ".rme"`.
- cluster file in PMSCO format.
its name is `output_file + ".clu"`.
the cluster and params objects are updated and linked to the scattering files
so that they can be passed to EDAC without further modification.
the radial matrix element is currently not used.
note that the scattering files are numbered according to the atomic environment and not chemical element.
this means that the updated cluster (cluster object or ".clu" file)
must be used in the scattering calculation.
atomic index is not preserved - atoms in the input and output clusters can only be related by coordinate!
because PHAGEN generates a lot of files with hard-coded names,
the function creates a temporary directory for PHAGEN and deletes it before returning.
@param params: pmsco.project.Params object.
the phase_files attribute is updated with the paths of the scattering files.
@param cluster: pmsco.cluster.Cluster object.
the cluster is updated with the one returned from PHAGEN.
the atom classes are linked to the scattering files.
@param scan: pmsco.project.Scan object.
the scan object is used to determine the kinetic energy range.
@param output_file: base path and name of the output files.
@return (None, dict) where dict is a list of output files with their category.
the category is "atomic" for all output files.
"""
transl = Translator()
transl.params.set_params(params)
transl.params.set_cluster(cluster)
transl.params.set_scan(scan)
phagen_cluster = pmsco.cluster.Cluster()
files = {}
prev_wd = os.getcwd()
try:
with tempfile.TemporaryDirectory() as temp_dir:
os.chdir(temp_dir)
os.mkdir("div")
os.mkdir("div/wf")
os.mkdir("plot")
os.mkdir("data")
# prepare input for phagen
infile = "phagen.in"
outfile = "phagen.out"
try:
transl.write_input(infile)
report_infile = os.path.join(prev_wd, output_file + ".phagen.in")
shutil.copy(infile, report_infile)
files[report_infile] = "input"
except IOError:
logger.warning("error writing phagen input file {fi}.".format(fi=infile))
# call phagen
libmain(infile, outfile)
# collect results
try:
phafile = outfile + ".pha"
transl.parse_phagen_phase(phafile)
report_phafile = os.path.join(prev_wd, output_file + ".phagen.pha")
shutil.copy(phafile, report_phafile)
files[report_phafile] = "output"
except IOError:
logger.error("error loading phagen phase file {fi}".format(fi=phafile))
try:
radfile = outfile + ".rad"
transl.parse_radial_file(radfile)
report_radfile = os.path.join(prev_wd, output_file + ".phagen.rad")
shutil.copy(radfile, report_radfile)
files[report_radfile] = "output"
except IOError:
logger.error("error loading phagen radial file {fi}".format(fi=radfile))
try:
clufile = outfile + ".clu"
phagen_cluster.load_from_file(clufile, pmsco.cluster.FMT_PHAGEN_OUT)
except IOError:
logger.error("error loading phagen cluster file {fi}".format(fi=clufile))
finally:
os.chdir(prev_wd)
# write edac files
scatfile = output_file + "_{}.scat"
scatfiles = transl.write_edac_scattering(scatfile)
params.phase_files = {c: scatfiles[c] for c in scatfiles}
files.update({scatfiles[c]: "atomic" for c in scatfiles})
rmefile = output_file + ".rme"
transl.write_edac_emission(rmefile)
files[rmefile] = "atomic"
cluster.update_atoms(phagen_cluster, {'c'})
clufile = output_file + ".pmsco.clu"
cluster.save_to_file(clufile, pmsco.cluster.FMT_PMSCO)
files[clufile] = "cluster"
return None, files

View File

@ -0,0 +1,411 @@
"""
@package pmsco.calculators.phagen.translator
Natoli/Sebilleau PHAGEN interface
this module provides conversion between input/output files of PHAGEN and EDAC.
@author Matthias Muntwiler
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from pmsco.compat import open
## rydberg energy in electron volts
ERYDBERG = 13.6056923
def state_to_edge(state):
"""
translate spectroscopic notation to edge notation.
@param state: spectroscopic notation: "1s", "2s", "2p1/2", etc.
@return: edge notation: "k", "l1", "l2", etc.
note: if the j-value is not given, the lower j edge is returned.
"""
jshells = ['s', 'p1/2', 'p3/2', 'd3/2', 'd5/2', 'f5/2', 'f7/2']
lshells = [s[0] for s in jshells]
shell = int(state[0])
try:
subshell = jshells.index(state[1:]) + 1
except ValueError:
subshell = lshells.index(state[1]) + 1
except IndexError:
subshell = 1
edge = "klmnop"[shell-1]
if shell > 1:
edge += str(subshell)
return edge
class TranslationParams(object):
"""
project parameters needed for translation.
energy unit is eV.
"""
def __init__(self):
self.initial_state = "1s"
self.binding_energy = 0.
self.cluster = None
self.kinetic_energies = np.empty(0, dtype=np.float)
@property
def l_init(self):
return "spdf".index(self.initial_state[1])
@property
def edge(self):
return state_to_edge(self.initial_state)
def set_params(self, params):
"""
set the translation parameters.
@param params: a pmsco.project.Params object or
a dictionary containing some or all public fields of this class.
@return: None
"""
try:
self.initial_state = params.initial_state
self.binding_energy = params.binding_energy
except AttributeError:
for key in params:
self.__setattr__(key, params[key])
def set_scan(self, scan):
"""
set the scan parameters.
@param scan: a pmsco.project.Scan object
@return: None
"""
try:
energies = scan.energies
except AttributeError:
try:
energies = scan['e']
except KeyError:
energies = scan
if not isinstance(energies, np.ndarray):
energies = np.array(energies)
self.kinetic_energies = np.resize(self.kinetic_energies, energies.shape)
self.kinetic_energies = energies
def set_cluster(self, cluster):
"""
set the initial cluster.
@param cluster: a pmsco.cluster.Cluster object
@return: None
"""
self.cluster = cluster
class Translator(object):
"""
data conversion to/from phagen input/output files.
usage:
1. set the translation parameters self.params.
2. call write_input_file to create the phagen input files.
3. call phagen on the input file.
4. call parse_phagen_phase.
5. call parse_radial_file.
6. call write_edac_scattering to produce the EDAC scattering matrix files.
7. call write_edac_emission to produce the EDAC emission matrix file.
"""
def __init__(self):
"""
initialize the object instance.
"""
self.params = TranslationParams()
dt = [('e', 'f4'), ('a', 'i4'), ('l', 'i4'), ('t', 'c16')]
self.scattering = np.empty(0, dtype=dt)
dt = [('e', 'f4'), ('dw', 'c16'), ('up', 'c16')]
self.emission = np.empty(0, dtype=dt)
def write_cluster(self, f):
"""
write the cluster section of the PHAGEN input file.
requires a valid pmsco.cluster.Cluster in self.params.cluster.
@param f: file or output stream (an object with a write method)
@return: None
"""
for atom in self.params.cluster.data:
d = {k: atom[k] for k in atom.dtype.names}
f.write("{s} {t} {x} {y} {z}\n".format(**d))
f.write("-1 -1 0. 0. 0.\n")
def write_ionicity(self, f):
"""
write the ionicity section of the PHAGEN input file.
ionicity is read from the 'q' column of the cluster.
all atoms of a chemical element must have the same charge state
because ionicity has to be specified per element.
this function writes the average of all charge states of an element.
@param f: file or output stream (an object with a write method)
@return: None
"""
data = self.params.cluster.data
elements = np.unique(data['t'])
for element in elements:
idx = np.where(data['t'] == element)
charge = np.mean(data['q'][idx])
f.write("{t} {q}\n".format(t=element, q=charge))
f.write("-1\n")
def write_input(self, f):
"""
write the PHAGEN input file.
@param f: file path or output stream (an object with a write method).
@return: None
"""
phagen_params = {}
phagen_params['emin'] = self.params.kinetic_energies.min() / ERYDBERG
phagen_params['emax'] = self.params.kinetic_energies.max() / ERYDBERG
phagen_params['delta'] = (phagen_params['emax'] - phagen_params['emin']) / \
(self.params.kinetic_energies.shape[0] - 1)
if phagen_params['delta'] < 0.0001:
phagen_params['delta'] = 0.1
phagen_params['edge'] = state_to_edge(self.params.initial_state) # possibly not used
phagen_params['edge1'] = 'm4' # auger not supported
phagen_params['edge2'] = 'm4' # auger not supported
phagen_params['cip'] = self.params.binding_energy / ERYDBERG
if phagen_params['cip'] < 0.001:
raise ValueError("binding energy parameter is zero.")
if np.sum(np.abs(self.params.cluster.data['q']) >= 0.001) > 0:
phagen_params['ionzst'] = 'ionic'
else:
phagen_params['ionzst'] = 'neutral'
if hasattr(f, "write"):
f.write("&job\n")
f.write("calctype='xpd',\n")
f.write("coor='angs',\n")
f.write("cip={cip},\n".format(**phagen_params))
f.write("edge='{edge}',\n".format(**phagen_params))
f.write("edge1='{edge1}',\n".format(**phagen_params))
f.write("edge2='{edge1}',\n".format(**phagen_params))
f.write("gamma=0.03,\n")
f.write("lmax_mode=2,\n")
f.write("lmaxt=50,\n")
f.write("emin={emin},\n".format(**phagen_params))
f.write("emax={emax},\n".format(**phagen_params))
f.write("delta={delta},\n".format(**phagen_params))
f.write("potgen='in',\n")
f.write("potype='hedin',\n")
f.write("norman='stdcrm',\n")
f.write("ovlpfac=0.0,\n")
f.write("ionzst='{ionzst}',\n".format(**phagen_params))
f.write("charelx='ex',\n")
f.write("l2h=4\n")
f.write("&end\n")
f.write("comment 1\n")
f.write("comment 2\n")
f.write("\n")
self.write_cluster(f)
self.write_ionicity(f)
else:
with open(f, "w") as fi:
self.write_input(fi)
def parse_phagen_phase(self, f):
"""
parse the phase output file from PHAGEN.
the phase file is written to div/phases.dat.
it contains the following columns:
@arg e energy (Ry)
@arg x1 unknown 1
@arg x2 unknown 2
@arg na atom index (1-based)
@arg nl angular momentum quantum number l
@arg tr real part of the scattering matrix element
@arg ti imaginary part of the scattering matrix element
@arg ph phase shift
the data is translated into the self.scattering array.
@arg e energy (eV)
@arg a atom index (1-based)
@arg l angular momentum quantum number l
@arg t complex scattering matrix element
@param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
@return: None
"""
dt = [('e', 'f4'), ('x1', 'f4'), ('x2', 'f4'), ('na', 'i4'), ('nl', 'i4'),
('tr', 'f8'), ('ti', 'f8'), ('ph', 'f4')]
data = np.genfromtxt(f, dtype=dt)
self.scattering = np.resize(self.scattering, data.shape)
scat = self.scattering
scat['e'] = data['e'] * ERYDBERG
scat['a'] = data['na']
scat['l'] = data['nl']
scat['t'] = data['tr'] + 1j * data['ti']
def write_edac_scattering(self, filename_format, phases=False):
"""
write scatterer files for EDAC.
produces one file for each atom class in self.scattering.
@param filename_format: file name including a placeholder {} for the atom class.
@param phases: write phase files instead of t-matrix files.
@return: dictionary that maps atom classes to file names
"""
if phases:
write = self.write_edac_phase_file
else:
write = self.write_edac_scattering_file
scat = self.scattering
atoms = np.unique(scat['a'])
files = {}
for atom in atoms:
f = filename_format.format(atom)
sel = scat['a'] == atom
idx = np.where(sel)
atom_scat = scat[idx]
write(f, atom_scat)
files[atom] = f
return files
def write_edac_scattering_file(self, f, scat):
"""
write a scatterer file for EDAC.
@param f: file path or output stream (an object with a write method).
@param scat: a slice of the self.scattering array belonging to the same atom class.
@return: None
"""
if hasattr(f, "write"):
energies = np.unique(scat['e'])
ne = energies.shape[0]
lmax = scat['l'].max()
if ne == 1:
f.write("1 {lmax} regular tl\n".format(lmax=lmax))
else:
f.write("{nk} E(eV) {lmax} regular tl\n".format(nk=ne, lmax=lmax))
for energy in energies:
sel = scat['e'] == energy
idx = np.where(sel)
energy_scat = scat[idx]
if ne > 1:
f.write("{0:.3f} ".format(energy))
for item in energy_scat:
f.write(" {0:.6f} {1:.6f}".format(item['t'].real, item['t'].imag))
for i in range(len(energy_scat), lmax + 1):
f.write(" 0 0")
f.write("\n")
else:
with open(f, "w") as fi:
self.write_edac_scattering_file(fi, scat)
def write_edac_phase_file(self, f, scat):
"""
write a phase file for EDAC.
@param f: file path or output stream (an object with a write method).
@param scat: a slice of the self.scattering array belonging to the same atom class.
@return: None
"""
if hasattr(f, "write"):
energies = np.unique(scat['e'])
ne = energies.shape[0]
lmax = scat['l'].max()
if ne == 1:
f.write("1 {lmax} regular real\n".format(lmax=lmax))
else:
f.write("{nk} E(eV) {lmax} regular real\n".format(nk=ne, lmax=lmax))
for energy in energies:
sel = scat['e'] == energy
idx = np.where(sel)
energy_scat = scat[idx]
if ne > 1:
f.write("{0:.3f} ".format(energy))
for item in energy_scat:
f.write(" {0:.6f}".format(np.angle(item['t'])))
for i in range(len(energy_scat), lmax + 1):
f.write(" 0")
f.write("\n")
else:
with open(f, "w") as fi:
self.write_edac_phase_file(fi, scat)
def parse_radial_file(self, f):
"""
parse the radial matrix element output file from phagen.
@param f: file or path (any file-like or path-like object that can be passed to numpy.genfromtxt).
@return: None
"""
dt = [('ar', 'f8'), ('ai', 'f8'), ('br', 'f8'), ('bi', 'f8')]
data = np.genfromtxt(f, dtype=dt)
self.emission = np.resize(self.emission, data.shape)
emission = self.emission
emission['dw'] = data['ar'] + 1j * data['ai']
emission['up'] = data['br'] + 1j * data['bi']
def write_edac_emission(self, f):
"""
write the radial photoemission matrix element in EDAC format.
requires self.emission, self.params.kinetic_energies and self.params.initial_state.
@param f: file path or output stream (an object with a write method).
@return: None
"""
if hasattr(f, "write"):
l0 = self.params.l_init
energies = self.params.kinetic_energies
emission = self.emission
emission['e'] = energies
ne = energies.shape[0]
if ne == 1:
f.write("1 regular2 {l0}\n".format(l0=l0))
else:
f.write("{nk} E(eV) regular2 {l0}\n".format(nk=ne, l0=l0))
for item in emission:
if ne > 1:
f.write("{0:.3f} ".format(item['e']))
f.write(" {0:.6f} {1:.6f}".format(item['up'].real, item['up'].imag))
f.write(" {0:.6f} {1:.6f}".format(item['dw'].real, item['dw'].imag))
f.write("\n")
else:
with open(f, "w") as of:
self.write_edac_emission(of)

358
pmsco/cluster.py Normal file → Executable file
View File

@ -1,12 +1,15 @@
#!/usr/bin/env python
"""
@package pmsco.cluster
cluster tools for MSC and EDAC
cluster building and handling
the Cluster class is provided to facilitate the construction and import/export of clusters.
a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
the class can import from/export to EDAC, MSC, and XYZ cluster files.
the class can import from/export to various file formats.
XYZ allows for export to 3D visualizers, e.g. Avogadro.
the module has a command line interface to convert cluster files.
@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
@code{.sh}
pip install --user periodictable
@ -14,7 +17,11 @@ pip install --user periodictable
@author Matthias Muntwiler
@copyright (c) 2015-18 by Paul Scherrer Institut
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
@ -34,6 +41,12 @@ FMT_MSC = 1
FMT_EDAC = 2
## XYZ file format identifier
FMT_XYZ = 3
## PHAGEN output file format identifier
FMT_PHAGEN_OUT = 4
## PHAGEN input file format identifier
FMT_PHAGEN_IN = 5
## native file format identifier
FMT_PMSCO = 6
# python version dependent type of chemical symbol
if sys.version_info[0] >= 3:
@ -43,11 +56,14 @@ else:
## numpy.array datatype of Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
('e', 'u1')]
('e', 'u1'), ('q', 'f4'), ('c', 'i4')]
## file format of internal Cluster.data array
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"]
## field (column) names of internal Cluster.data array
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'x', 'y', 'z', 'e']
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q']
## column names for export
NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z',
'e': 'emitter', 'q': 'charge'}
## numpy.array datatype of cluster for MSC cluster file input/output
DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
@ -57,11 +73,11 @@ FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']
## numpy.array datatype of cluster for EDAC cluster file input/output
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
## file format of EDAC cluster file
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of EDAC cluster file
FIELDS_CLUSTER_EDAC = ['i', 't', 'x', 'y', 'z']
FIELDS_CLUSTER_EDAC = ['i', 'c', 'x', 'y', 'z']
## numpy.array datatype of cluster for XYZ file input/output
DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
@ -70,6 +86,44 @@ FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
## field (column) names of XYZ cluster file
FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']
## numpy.array datatype of cluster for PHAGEN output file input/output
DTYPE_CLUSTER_PHAGEN_OUT = [('i', 'i4'), ('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('c', 'i4')]
## file format of PHAGEN cluster output file
FMT_CLUSTER_PHAGEN_OUT = ["%5u", "%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%5u"]
## field (column) names of PHAGEN cluster output file
FIELDS_CLUSTER_PHAGEN_OUT = ['i', 's', 't', 'x', 'y', 'z', 'c']
## numpy.array datatype of cluster for PHAGEN input file input/output
DTYPE_CLUSTER_PHAGEN_IN = [('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('q', 'f4')]
## file format of PHAGEN input file, cluster section
FMT_CLUSTER_PHAGEN_IN = ["%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of PHAGEN input file, cluster section
FIELDS_CLUSTER_PHAGEN_IN = ['s', 't', 'x', 'y', 'z', 'q']
## dictionary of supported cluster data types
CLUSTER_DTYPES = {FMT_DEFAULT: DTYPE_CLUSTER_INTERNAL,
FMT_MSC: DTYPE_CLUSTER_MSC,
FMT_EDAC: DTYPE_CLUSTER_EDAC,
FMT_XYZ: DTYPE_CLUSTER_XYZ,
FMT_PHAGEN_OUT: DTYPE_CLUSTER_PHAGEN_OUT,
FMT_PHAGEN_IN: DTYPE_CLUSTER_PHAGEN_IN}
## dictionary of supported cluster file formats
CLUSTER_FMTS = {FMT_DEFAULT: FMT_CLUSTER_INTERNAL,
FMT_MSC: FMT_CLUSTER_MSC,
FMT_EDAC: FMT_CLUSTER_EDAC,
FMT_XYZ: FMT_CLUSTER_XYZ,
FMT_PHAGEN_OUT: FMT_CLUSTER_PHAGEN_OUT,
FMT_PHAGEN_IN: FMT_CLUSTER_PHAGEN_IN}
## dictionary of supported cluster field names
CLUSTER_FIELDS = {FMT_DEFAULT: FIELDS_CLUSTER_INTERNAL,
FMT_MSC: FIELDS_CLUSTER_MSC,
FMT_EDAC: FIELDS_CLUSTER_EDAC,
FMT_XYZ: FIELDS_CLUSTER_XYZ,
FMT_PHAGEN_OUT: FIELDS_CLUSTER_PHAGEN_OUT,
FMT_PHAGEN_IN: FIELDS_CLUSTER_PHAGEN_IN}
class Cluster(object):
"""
@ -84,6 +138,8 @@ class Cluster(object):
- t coordinate of the atom position
- z coordinate of the atom position
- emitter flag
- charge/ionicity
- scatterer class
the class also defines methods that add or manipulate atoms of the cluster.
see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
@ -126,6 +182,8 @@ class Cluster(object):
# @arg @c 'y' (float32) t coordinate of the atom position
# @arg @c 'z' (float32) z coordinate of the atom position
# @arg @c 'e' (uint8) 1 = emitter, 0 = regular atom
# @arg @c 'q' (float32) charge/ionicity
# @arg @c 'c' (int) scatterer class
## @var comment (str)
# one-line comment that can be included in some cluster files
@ -152,6 +210,9 @@ class Cluster(object):
@param cluster: (Cluster) other Cluster object.
"""
self.data = cluster.data.copy()
self.rmax = cluster.rmax
self.dtype = cluster.dtype
self.comment = cluster.comment
def set_rmax(self, r):
"""
@ -166,7 +227,7 @@ class Cluster(object):
"""
self.rmax = r
def build_element(self, index, element_number, x, y, z, emitter):
def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer=0):
"""
build a tuple in the format of the internal data array.
@ -177,12 +238,16 @@ class Cluster(object):
@param x, y, z: (float) atom coordinates in the cluster
@param emitter: (int or bool) True = emitter, False = scatterer
@param charge: (float) ionicity. default = 0
@param scatterer: (int) scatterer class. default = 0.
"""
symbol = pt.elements[element_number].symbol
element = (index, element_number, symbol, x, y, z, int(emitter))
element = (index, element_number, symbol, x, y, z, int(emitter), charge, scatterer)
return element
def add_atom(self, atomtype, v_pos, is_emitter):
def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.):
"""
add a single atom to the cluster.
@ -191,11 +256,15 @@ class Cluster(object):
@param v_pos: (numpy.ndarray, shape = (3)) position vector
@param is_emitter: (int or bool) True = emitter, False = scatterer
@param charge: (float) ionicity. default = 0
@return array index of added atom
"""
n0 = self.data.shape[0] + 1
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], int(is_emitter))
self.data = np.append(self.data, np.array(element,
dtype=self.data.dtype))
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter, charge)
self.data = np.append(self.data, np.array(element, dtype=self.data.dtype))
return n0 - 1
def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
"""
@ -290,19 +359,21 @@ class Cluster(object):
source = cluster.data.copy()
if check_rmax and source.shape[0] > 0:
source_xyz = source[['x', 'y', 'z']].copy()
source_xyz = source_xyz.view((source_xyz.dtype[0], len(source_xyz.dtype.names)))
source_xyz = cluster.get_positions()
b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
idx = np.where(b_rmax)
source = source[idx]
data = np.append(data, source)
if check_unique and data.shape[0] > 0:
data_xyz = data[['x', 'y', 'z']].copy()
data_xyz = data_xyz.view((data_xyz.dtype[0], len(data_xyz.dtype.names)))
tol_xyz = np.round(data_xyz / tol)
uni_xyz = tol_xyz.view(tol_xyz.dtype.descr * 3)
_, idx = np.unique(uni_xyz, return_index=True)
data_xyz = np.empty((data.shape[0], 3))
data_xyz[:, 0] = data['x']
data_xyz[:, 1] = data['y']
data_xyz[:, 2] = data['z']
tol *= 2
uni_xyz = np.round(data_xyz / tol)
# this requires numpy 1.13 or later
_, idx = np.unique(uni_xyz, return_index=True, axis=0)
data = data[np.sort(idx)]
self.data = data
@ -322,8 +393,10 @@ class Cluster(object):
the returned coordinates may not be identical to any atom coordinate of a layer
but deviate up to the given tolerance.
"""
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
z2 = np.round(self_z.copy() / tol)
tol *= 2
self_z = np.empty(self.data.shape, np.float32)
self_z[:] = self.data['z']
z2 = np.round(self_z / tol)
layers = np.unique(z2) * tol
return layers
@ -338,7 +411,8 @@ class Cluster(object):
by default (element = 0), all atoms are moved.
@return: (numpy.ndarray) indices of the atoms that have been shifted.
"""
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
self_z = np.empty(self.data.shape, np.float32)
self_z[:] = self.data['z']
b_z = self_z <= z_cut
b_all = b_z
@ -434,12 +508,18 @@ class Cluster(object):
"""
find all atoms which occupy a given position.
@param pos: (numpy.array, shape = (3)) position vector.
@param pos: position vector.
this can be a numpy.ndarray with shape (3)
or any type where pos[0] represents the x-coordinate, pos[1] y, and pos[2] z.
@param tol: (float) matching tolerance per coordinate.
@return numpy.array of indices which match v_pos.
"""
if isinstance(pos, np.ndarray):
assert pos.shape == (3,)
else:
pos = np.array((pos[0], pos[1], pos[2]))
b2 = np.abs(pos - self.get_positions()) < tol
b1 = np.all(b2, axis=1)
idx = np.where(b1)
@ -463,8 +543,9 @@ class Cluster(object):
@return numpy.array of indices which match v_pos.
"""
pos_xy = pos[0:2]
self_xy = self.data[['x', 'y']].copy()
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
self_xy = np.empty((self.data.shape[0], 2), np.float32)
self_xy[:, 0] = self.data['x']
self_xy[:, 1] = self.data['y']
b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy
pos_z = pos[2]
@ -497,8 +578,9 @@ class Cluster(object):
@return: None
"""
self_xy = self.data[['x', 'y']].copy()
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
self_xy = np.empty((self.data.shape[0], 2), np.float32)
self_xy[:, 0] = self.data['x']
self_xy[:, 1] = self.data['y']
b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy
self_z = self.data['z']
@ -545,8 +627,7 @@ class Cluster(object):
@return: None
"""
self_xyz = self.data[['x', 'y', 'z']].copy()
self_xyz = self_xyz.view((self_xyz.dtype[0], len(self_xyz.dtype.names)))
self_xyz = self.get_positions()
b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
idx = np.where(b_xyz)
self.data = self.data[idx]
@ -562,7 +643,8 @@ class Cluster(object):
@return: None
"""
coord = self.data[axis].view(np.float32).reshape(self.data.shape)
coord = np.empty(self.data.shape, np.float32)
coord[:] = self.data[axis]
sel = np.abs(coord - center) <= depth / 2
idx = np.where(sel)
self.data = self.data[idx]
@ -617,15 +699,17 @@ class Cluster(object):
def get_positions(self):
"""
get an array of the atom coordinates.
get the atom coordinates in a two-dimensional array.
the returned array is an independent copy of the original data.
changes will not affect the original cluster.
@return numpy.ndarray, shape = (N,3)
"""
pos = self.data[['x', 'y', 'z']].copy()
pos = pos.view((pos.dtype[0], len(pos.dtype.names)))
pos = np.empty((self.data.shape[0], 3), np.float32)
pos[:, 0] = self.data['x']
pos[:, 1] = self.data['y']
pos[:, 2] = self.data['z']
return pos
def set_positions(self, positions):
@ -689,14 +773,16 @@ class Cluster(object):
rec = self.data[index]
return rec['s']
def get_emitters(self):
def get_emitters(self, fields):
"""
get a list of all emitters.
@return list of tuples (x, y, z, atomtype)
@param fields: list of field (column) names to return
@return list of tuples. each tuple contains the values of the requested fields.
"""
idx = self.data['e'] != 0
ems = self.data[['x', 'y', 'z', 't']][idx]
ems = self.data[fields][idx]
return [tuple(em) for em in ems]
def get_emitter_count(self):
@ -711,10 +797,22 @@ class Cluster(object):
def load_from_file(self, f, fmt=FMT_DEFAULT):
"""
load a cluster from a file created by the scattering program.
the file formats differ in the columns that they contain.
only the 'x', 'y', 'z' coordinates are common to all formats.
at least one of the 's' and 't' columns must be present.
missing columns are initialized as follows.
@arg 'i': reset to a 1-based sequential index (@ref update_index).
@arg 's': derived from the 't' column (@ref update_symbols).
@arg 't': derived from the 's' column (@ref update_atomtypes).
@arg 'e': set to 0.
@arg 'c': set equal to the 't' column (@ref init_atomclasses).
@arg 'q': set to 0.
@param f (string/handle): path name or open file handle of the cluster file.
@param f: path name or open file handle of the cluster file.
@param fmt (int): file format.
@param fmt: file format.
must be one of the FMT_ constants.
if FMT_DEFAULT, self.file_format is used.
@ -735,12 +833,25 @@ class Cluster(object):
dtype = DTYPE_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
sh = 2
elif fmt == FMT_PHAGEN_OUT:
dtype = DTYPE_CLUSTER_PHAGEN_OUT
fields = FIELDS_CLUSTER_PHAGEN_OUT
sh = 1
elif fmt == FMT_PHAGEN_IN:
dtype = DTYPE_CLUSTER_PHAGEN_IN
fields = FIELDS_CLUSTER_PHAGEN_IN
sh = 0
elif fmt == FMT_PMSCO:
dtype = DTYPE_CLUSTER_INTERNAL
fields = FIELDS_CLUSTER_INTERNAL
sh = 1
else:
dtype = DTYPE_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
sh = 2
raise ValueError("unknown file format {}".format(fmt))
data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1:
data = data[:-1]
self.data = np.empty(data.shape, dtype=self.dtype)
self.data['x'] = data['x']
self.data['y'] = data['y']
@ -753,14 +864,23 @@ class Cluster(object):
self.data['t'] = data['t']
if 's' in fields:
self.data['s'] = data['s']
else:
elif 't' in fields:
self.update_symbols()
if 't' not in fields:
self.update_atomtypes()
if 's' in fields:
self.update_atomtypes()
if 'e' in fields:
self.data['e'] = data['e']
else:
self.data['e'] = 0
if 'c' in fields:
self.data['c'] = data['c']
else:
self.data['c'] = 0
if 'q' in fields:
self.data['q'] = data['q']
else:
self.data['q'] = 0.
pos = self.get_positions()
# note: np.linalg.norm does not accept axis argument in version 1.7
@ -788,6 +908,35 @@ class Cluster(object):
for atom in self.data:
atom['t'] = pt.elements.symbol(atom['s'].strip()).number
def init_atomclasses(self, field_or_value='t', default_only=False):
"""
initialize atom classes from atom types.
atom classes identify the atomic scattering potential or scattering factors
to be used in the multiple scattering program.
if the scattering factors are calculated in the PMSCO process (by EDAC or PHAGEN),
the atom classes must be set equal to the element type
or left at the default value 0 in which case PMSCO sets the correct values.
if the scattering factors are loaded from existing files,
the atom class corresponds to the key of the pmsco.project.Params.phase_files dictionary.
in this case the meaning of the class value is up to the project,
and the class must be set either by the cluster generator
or the project's after_atomic_scattering hook.
@param field_or_value: name of a cluster data field, e.g. 't', or an integer constant.
@param default_only: initialize classes only if they are at their default value (0).
@return None
"""
if not default_only or np.sum(np.abs(self.data['c'])) == 0:
if isinstance(field_or_value, str):
self.data['c'] = self.data[field_or_value]
else:
self.data['c'] = field_or_value
def update_index(self):
"""
update the index column.
@ -795,10 +944,44 @@ class Cluster(object):
if you have modified the order or number of elements in the self.data array directly,
you may need to re-index the atoms if your code uses functions that rely on the index.
@return: None
@return None
"""
self.data['i'] = np.arange(1, self.data.shape[0] + 1)
def update_atoms(self, clu, fields):
"""
update atom properties from another cluster.
this method copies selected fields from another cluster.
the other cluster must contain the same atoms (same coordinates) in a possibly random order.
the atoms of this and the other cluster are matched up by sorting them by coordinate.
atomic scattering calculators often change the order of atoms in a cluster based on symmetry,
and return atom classes versus atomic coordinates.
this method allows to import the atom classes into the original cluster.
the method checks that the other cluster contains the same number of atoms.
it does not check that the clusters contain the same atomic positions.
linear translations are acceptable.
@param clu: cluster.Cluster object
@param fields: subset of field names out of FIELDS_CLUSTER_INTERNAL.
'i', 'x', 'y', 'z' are ignored.
the set can be specified in any type that converts into a set of strings.
@return: None
@raise AssertError if the clusters do not contain the same number of atoms
"""
assert self.data.shape == clu.data.shape
fields = set(fields) - {'i', 'x', 'y', 'z'}
common_order = ('z', 'y', 'x')
index_self = np.argsort(self.data, order=common_order)
index_other = np.argsort(clu.data, order=common_order)
for field in fields:
self.data[field][index_self] = clu.data[field][index_other]
def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
"""
save the cluster to a file which can be read by the scattering program.
@ -846,10 +1029,21 @@ class Cluster(object):
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
elif fmt == FMT_PHAGEN_IN:
file_format = FMT_CLUSTER_PHAGEN_IN
fields = FIELDS_CLUSTER_PHAGEN_IN
header = None
elif fmt == FMT_PHAGEN_OUT:
file_format = FMT_CLUSTER_PHAGEN_OUT
fields = FIELDS_CLUSTER_PHAGEN_OUT
header = ""
elif fmt == FMT_PMSCO:
file_format = FMT_CLUSTER_INTERNAL
fields = FIELDS_CLUSTER_INTERNAL
names = NAMES_CLUSTER_INTERNAL
header = "# " + " ".join([names[field] for field in fields])
else:
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
raise ValueError("unknown file format {}".format(fmt))
data = data[fields]
np.savetxt(f, data, fmt=file_format, header=header, comments="")
@ -996,3 +1190,67 @@ class LegacyClusterGenerator(ClusterGenerator):
redirect the call to the corresponding project method.
"""
return self.project.create_cluster(model, index)
def parse_cli():
"""
parse the command line
@return: Namespace object created by the argument parser.
"""
import argparse
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""
cluster conversion
""")
format_choices = ["PMSCO", "MSC", "EDAC", "XYZ", "PHAGEN_OUT", "PHAGEN_IN"]
parser.add_argument('input_format',
choices=format_choices,
help="format of input file")
parser.add_argument('input_file',
help="path and name of input file")
parser.add_argument('output_format',
choices=format_choices,
help="format of output file")
parser.add_argument('output_file',
help="path and name of output file")
args = parser.parse_args()
return args
def convert_cli(args):
"""
convert cluster files from one format into another
this function is part of the command line interface
@param args: command line arguments
@return: None
"""
clu = Cluster()
clu.file_format = FMT_PMSCO
input_format = globals()["FMT_" + args.input_format.upper()]
output_format = globals()["FMT_" + args.output_format.upper()]
clu.load_from_file(args.input_file, input_format)
clu.save_to_file(args.output_file, output_format)
def main_cli():
"""
command line interface to convert cluster files
see @ref convert_cli.
@return: None
"""
args = parse_cli()
convert_cli(args)
if __name__ == '__main__':
main_cli()
sys.exit(0)

View File

@ -342,6 +342,53 @@ class ResultsDatabase(object):
where param_id = :param_id and model_id = :model_id
"""
sql_create_tags = """CREATE TABLE IF NOT EXISTS `Tags` (
`id` INTEGER PRIMARY KEY,
`key` TEXT NOT NULL UNIQUE COLLATE NOCASE
)"""
sql_insert_tag = "insert into Tags(key) values (:key)"
sql_select_tag = "select key from Tags where id=:id"
sql_select_tag_key = "select id, key from Tags where key=:key"
sql_select_tag_project = """select distinct key, tag_id from Jobs
join JobTags on Jobs.id = JobTags.job_id
join Tags on Tags.id = JobTags.tag_id
where Jobs.project_id = :project_id
order by key collate nocase"""
sql_select_tag_job = """select distinct key, tag_id from JobTags
join Tags on Tags.id = JobTags.tag_id
where JobTags.job_id = :job_id
order by key collate nocase"""
sql_create_jobtags = """CREATE TABLE IF NOT EXISTS `JobTags` (
`id` INTEGER PRIMARY KEY,
`tag_id` INTEGER NOT NULL,
`job_id` INTEGER NOT NULL,
`value` TEXT COLLATE NOCASE,
FOREIGN KEY(tag_id) REFERENCES Tags(id) ON DELETE CASCADE,
FOREIGN KEY(job_id) REFERENCES Jobs(id) ON DELETE CASCADE
)"""
sql_index_jobtags = """create index if not exists
`index_jobtags` ON `JobTags`
(`tag_id`, `job_id`)"""
sql_drop_index_jobtags = "drop index if exists index_jobtags"
sql_insert_jobtag = """
insert into JobTags(tag_id, job_id, value)
values (:tag_id, :job_id, :value)
"""
sql_update_jobtag = """
update JobTags set value=:value where id=:jobtag_id
"""
sql_select_jobtag_job = """
select key, value from JobTags
join Tags on JobTags.tag_id = Tags.id
where job_id = :job_id
"""
sql_select_jobtag = """
select JobTags.id as id, key, value from JobTags
join Tags on JobTags.tag_id = Tags.id
where tag_id = :tag_id and job_id = :job_id
"""
# @var _conn (sqlite3.Connection).
# connection interface to the database.
#
@ -391,6 +438,7 @@ class ResultsDatabase(object):
self.project_id = 0
self.job_id = 0
self._model_params = {}
self._tags = {}
self._lock_filename = ""
self._lock = None
@ -484,9 +532,12 @@ class ResultsDatabase(object):
self._conn.execute(self.sql_create_results)
self._conn.execute(self.sql_create_params)
self._conn.execute(self.sql_create_paramvalues)
self._conn.execute(self.sql_create_tags)
self._conn.execute(self.sql_create_jobtags)
self._conn.execute(self.sql_index_results_tasks)
self._conn.execute(self.sql_index_results_models)
self._conn.execute(self.sql_index_paramvalues)
self._conn.execute(self.sql_index_jobtags)
self._conn.execute(self.sql_index_models)
def register_project(self, name, code):
@ -583,6 +634,46 @@ class ResultsDatabase(object):
param_dict = {'job_id': job_id}
self._conn.execute(self.sql_delete_job, param_dict)
def _query_job_name(self, job_name, project_id=0):
"""
(internal) query a job by name
this is the internal analog of @ref query_job_name
which asserts an acquired lock and open connection.
@param job_name: name of the job
@param project_id: project identifier.
by default, the current project self.project_id is used.
@return: id value of the job in the database
@raise DatabaseError if the job can't be found.
"""
if project_id == 0:
project_id = self.project_id
param_dict = {'project_id': project_id, 'name': job_name}
c = self._conn.execute(self.sql_select_job_name, param_dict)
v = c.fetchone()
return v[0]
def query_job_name(self, job_name, project_id=0):
"""
query a job by name
@param job_name: name of the job
@param project_id: project identifier.
by default, the current project self.project_id is used.
@return: id value of the job in the database
"""
self.check_connection()
with self._lock, self._conn:
job_id = self._query_job_name(job_name, project_id=project_id)
return job_id
def register_param(self, key):
"""
register a parameter key with the database.
@ -681,6 +772,165 @@ class ResultsDatabase(object):
return params
def register_tag(self, key):
"""
register a tag with the database.
tags are a way of structuring a job description.
they can be used to, for instance, distinguish calculations made with different clusters,
different experimental data, etc.
a job tag has a key and a value, and is associated to a job.
the use of tags is up to the user. pmsco does not change or read them.
each tag name must be registered once before a value can be written to the database.
see the class description for an explanation.
@param key: key (name) of the tag.
@return: id value of the tag in the database.
"""
self.check_connection()
with self._lock, self._conn:
return self._register_tag(key)
def _register_tag(self, key):
"""
register a tag with the database without committing the transaction.
@note this method does not lock the database file and does not commit.
to lock the database and commit the transaction, call the public method register_tag().
@param key: key (name) of the tag.
@return: id value of the tag in the database.
"""
c = self._conn.execute(self.sql_select_tag_key, {'key': key})
v = c.fetchone()
if v:
tag_id = v[0]
else:
c = self._conn.execute(self.sql_insert_tag, {'key': key})
tag_id = c.lastrowid
self._tags[key] = tag_id
return tag_id
def register_tags(self, tags):
"""
register the tags of this project with the database.
each tag name must be registered once before a value can be written to the database.
see the class description for an explanation.
@param tags: sequence of tag keys, or dictionary of tags.
@return: None
"""
self.check_connection()
with self._lock, self._conn:
for key in tags:
self._register_tag(key)
def query_tags(self, project_id=0, job_id=0, update_registry=False):
"""
query a list of tag keys used in a project or job.
optionally, the local registry can be updated with the results of the query.
this should be done if the database is read only and the client does not know the tag names.
see the class description for a description of the registry.
@note this method returns the tags that are used with jobs in the database.
if you have registered additional tags but not attached them to jobs,
this method will _not_ list them.
@param project_id: project identifier.
by default, the current project self.project_id is used.
@param job_id: job identifier.
by default, all jobs of the selected project are included in the query.
if a job is specified, the project_id parameter is ignored.
@param update_registry: update the local tags registry (self._tags).
with the query results.
@return: dictionary of tags.
the keys are the tag names, the values are the tag ids in the database.
"""
if project_id == 0:
project_id = self.project_id
if job_id == 0:
sql = self.sql_select_tag_project
args = {'project_id': project_id}
else:
sql = self.sql_select_tag_job
args = {'job_id': job_id}
tags = {}
self.check_connection()
with self._lock, self._conn:
c = self._conn.execute(sql, args)
for row in c:
tags[row['key']] = row['tag_id']
if update_registry:
self._tags.update(tags)
return tags
def query_job_tags(self, job_id):
"""
query a list of tags (keys and values) associated with a job.
@param job_id: job identifier.
@return: dictionary of tags.
the keys are the tag names, the values are the tag values.
"""
sql = self.sql_select_jobtag_job
args = {'job_id': job_id}
tags = {}
self.check_connection()
with self._lock, self._conn:
c = self._conn.execute(sql, args)
for row in c:
tags[row['key']] = row['value']
return tags
def insert_jobtags(self, job_id, tags):
"""
add or update job tags in the database.
the method updates the JobTags table.
@param job_id: (int) primary key of the job entry in the Jobs table.
the entry must exist.
@param tags: (dict) dictionary containing the tags.
keys are matched or added to the Tags table,
values are added to the JobTags table and linked to the job and tag key.
@return: None
"""
self.check_connection()
with self._lock, self._conn:
for key, value in tags.items():
try:
tag_id = self._tags[key]
except KeyError:
tag_id = self._register_tag(key)
v = None
else:
jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
c = self._conn.execute(self.sql_select_jobtag, jobtag_entry)
v = c.fetchone()
if v:
jobtag_entry = {'jobtag_id': v[0], 'tag_id': tag_id, 'job_id': job_id, 'value': value}
self._conn.execute(self.sql_update_jobtag, jobtag_entry)
else:
jobtag_entry = {'tag_id': tag_id, 'job_id': job_id, 'value': value}
self._conn.execute(self.sql_insert_jobtag, jobtag_entry)
def create_models_view(self, job_id=0, temporary=False):
"""
create a flat (pivot) view of model parameters of the current project or job.
@ -878,7 +1128,7 @@ class ResultsDatabase(object):
results = c.fetchall()
names = [desc[0] for desc in c.description]
dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names)])
dt = np.dtype([(field_to_param(n), field_to_numpy_type(n)) for n in sorted(names, key=str.lower)])
out_array = np.zeros((count,), dtype=dt)
for idx, row in enumerate(results):
for name in names:
@ -942,6 +1192,70 @@ class ResultsDatabase(object):
return out_array
def query_best_models_per_jobs(self, job_ids=None, task_level='model'):
"""
return the best model (by rfac) of each selected job
the query gathers the R-factors of the selected jobs at the selected task levels
and, for each job, returns the (database) model id where the lowest R-factor is reported
among the gathered results.
this can be useful if you want to compile a report of the best model per job.
@param job_ids: iterable of job ids to include in the query.
the job ids must belong to the current project.
if empty or non-specified, all jobs of the current project are included.
@param task_level: element of or index into @ref pmsco.dispatch.CALC_LEVELS.
deepest task_level to include in the query.
results on deeper levels are not considered.
e.g. if you pass 'scan', R-factors of individual scans are included in the query.
note that including deeper levels will not increase the number of results returned.
@return sequence of model_id.
the number of results corresponds to the number of jobs in the filter scope.
to find out details of the models, execute another query that filters on these model ids.
the method produces an SQL query similar to:
@code{.sql}
select Models.id from Models
join Results on Models.id = Results.model_id
join Jobs on Models.job_id = Jobs.id
where scan=-1
and project_id=1
and job_id in (1,2,3)
group by Models.job_id
having min(rfac)
order by rfac
@endcode
"""
try:
level = dispatch.CALC_LEVELS.index(task_level) + 1
except ValueError:
level = task_level + 1
try:
level_name = dispatch.CALC_LEVELS[level]
except IndexError:
level_name = dispatch.CALC_LEVELS[4]
self.check_connection()
with self._lock, self._conn:
sql = "select Models.id from Models "
sql += "join Results on Models.id = Results.model_id "
sql += "join Jobs on Models.job_id = Jobs.id "
sql += "where project_id = {0} ".format(self.project_id)
sql += "and {0} = -1 ".format(level_name)
if job_ids:
sql += "and Models.job_id in ({0}) ".format(",".join(map(str, job_ids)))
sql += "group by Models.job_id "
sql += "having min(rfac) "
sql += "order by rfac, job_id, model, scan, sym, emit, region "
c = self._conn.execute(sql)
models = [row['id'] for row in c]
return models
def query_tasks(self, job_id=0):
"""
query the task index used in a calculation job.
@ -1213,13 +1527,18 @@ class ResultsDatabase(object):
data = np.genfromtxt(filename, names=True)
self.register_params(data.dtype.names)
unique_models, unique_index = np.unique(data['_model'], True)
try:
unique_models, unique_index = np.unique(data['_model'], True)
except ValueError:
unique_models = np.array([0])
unique_index = np.array([0])
unique_data = data[unique_index]
model_ids = {}
def model_entry_generator():
for result in unique_data:
model_entry = {'job_id': job_id,
'model': unique_models[0],
'gen': None,
'particle': None}
model_entry.update(special_params(result))
@ -1227,7 +1546,11 @@ class ResultsDatabase(object):
def result_entry_generator():
for result in data:
result_entry = {'model_id': model_ids[result['_model']],
try:
model = result['_model']
except ValueError:
model = unique_models[0]
result_entry = {'model_id': model_ids[model],
'scan': -1,
'sym': -1,
'emit': -1,
@ -1238,8 +1561,12 @@ class ResultsDatabase(object):
def param_entry_generator():
for result in unique_data:
try:
model = result['_model']
except ValueError:
model = unique_models[0]
for key, value in regular_params(result).items():
param_entry = {'model_id': model_ids[result['_model']],
param_entry = {'model_id': model_ids[model],
'param_id': self._model_params[key],
'value': value}
yield param_entry

View File

@ -227,7 +227,7 @@ class CalculationTask(object):
# files generated by the task and their category
#
# dictionary key is the file name,
# value is the file category, e.g. 'cluster', 'phase', etc.
# value is the file category, e.g. 'cluster', 'atomic', etc.
#
# this information is used to automatically clean up unnecessary data files.
@ -374,7 +374,7 @@ class CalculationTask(object):
this information is used to automatically clean up unnecessary data files.
@param name: file name (optionally including a path).
@param category: file category, e.g. 'cluster', 'phase', etc.
@param category: file category, e.g. 'cluster', 'atomic', etc.
@return: None
"""
self.files[name] = category
@ -521,7 +521,8 @@ class MscoProcess(object):
def __init__(self, comm):
self._comm = comm
self._project = None
self._calculator = None
self._atomic_scattering = None
self._multiple_scattering = None
self._running = False
self._finishing = False
self.stop_signal = False
@ -529,7 +530,8 @@ class MscoProcess(object):
def setup(self, project):
self._project = project
self._calculator = project.calculator_class()
self._atomic_scattering = project.atomic_scattering_factory()
self._multiple_scattering = project.multiple_scattering_factory()
self._running = False
self._finishing = False
self.stop_signal = False
@ -596,19 +598,18 @@ class MscoProcess(object):
scan = self._define_scan(task)
output_file = task.format_filename(ext="")
# check parameters and call the msc program
if clu.get_atom_count() < 2:
# check parameters and call the calculators
if clu.get_atom_count() >= 1:
self._calc_atomic(task, par, clu, scan, output_file)
else:
logger.error("empty cluster in calculation %s", s_id)
task.result_valid = False
elif clu.get_emitter_count() < 1:
if clu.get_emitter_count() >= 1:
self._calc_multiple(task, par, clu, scan, output_file)
else:
logger.error("no emitters in cluster of calculation %s.", s_id)
task.result_valid = False
else:
task.result_filename, files = self._calculator.run(par, clu, scan, output_file)
(root, ext) = os.path.splitext(task.result_filename)
task.file_ext = ext
task.result_valid = True
task.files.update(files)
task.time = datetime.datetime.now() - start_time
@ -666,6 +667,8 @@ class MscoProcess(object):
"""
nem = self._project.cluster_generator.count_emitters(task.model, task.id)
clu = self._project.cluster_generator.create_cluster(task.model, task.id)
# overwrite atom classes only if they are at their default value
clu.init_atomclasses(field_or_value='t', default_only=True)
if task.id.region == 0:
file_index = task.id._replace(region=-1)
@ -696,6 +699,59 @@ class MscoProcess(object):
return par
def _calc_atomic(self, task, par, clu, scan, output_file):
"""
calculate the atomic scattering factors if necessary and link them to the cluster.
the method first calls the `before_atomic_scattering` project hook,
the atomic scattering calculator,
and finally the `after_atomic_scattering` hook.
this process updates the par and clu objects to link to the created files.
if any of the functions returns None, the par and clu objects are left unchanged.
@param task: CalculationTask with all attributes set for the calculation.
@param par: pmsco.project.Params object for the calculator.
its phase_files attribute is updated with the created scattering files.
the radial matrix elements are not changed (but may be in a future version).
@param clu: pmsco.cluster.Cluster object for the calculator.
the cluster is overwritten with the one returned by the calculator,
so that atom classes match the phase_files.
@return: None
"""
_par = copy.deepcopy(par)
_clu = copy.deepcopy(clu)
_par, _clu = self._project.before_atomic_scattering(task, _par, _clu)
if _clu is not None:
filename, files = self._atomic_scattering.run(_par, _clu, scan, output_file)
if files:
task.files.update(files)
_par, _clu = self._project.after_atomic_scattering(task, _par, _clu)
if _clu is not None:
par.phase_files = _par.phase_files
clu.copy_from(_clu)
def _calc_multiple(self, task, par, clu, scan, output_file):
"""
calculate the multiple scattering intensity.
@param task: CalculationTask with all attributes set for the calculation.
@param par: pmsco.project.Params object for the calculator.
@param clu: pmsco.cluster.Cluster object for the calculator.
@return: None
"""
task.result_filename, files = self._multiple_scattering.run(par, clu, scan, output_file)
if task.result_filename:
(root, ext) = os.path.splitext(task.result_filename)
task.file_ext = ext
task.result_valid = True
if files:
task.files.update(files)
class MscoMaster(MscoProcess):
"""
@ -1025,19 +1081,19 @@ class MscoMaster(MscoProcess):
@return: self._finishing
"""
if not self._finishing and (self._model_done and not self._pending_tasks and not self._running_tasks):
logger.info("finish: model handler is done")
logger.warning("finish: model handler is done")
self._finishing = True
if not self._finishing and (self._calculations >= self.max_calculations):
logger.warning("finish: max. calculations (%u) exeeded", self.max_calculations)
self._finishing = True
if not self._finishing and self.stop_signal:
logger.info("finish: stop signal received")
logger.warning("finish: stop signal received")
self._finishing = True
if not self._finishing and (datetime.datetime.now() > self.datetime_limit):
logger.warning("finish: time limit exceeded")
self._finishing = True
if not self._finishing and os.path.isfile("finish_pmsco"):
logger.info("finish: finish_pmsco file detected")
logger.warning("finish: finish_pmsco file detected")
self._finishing = True
if self._finishing and not self._running_slaves and not self._running_tasks:

View File

@ -27,9 +27,9 @@ logger = logging.getLogger(__name__)
#
# each string of this set marks a category of files.
#
# @arg @c 'input' : raw input files for calculator, including cluster and phase files in custom format
# @arg @c 'input' : raw input files for calculator, including cluster and atomic files in custom format
# @arg @c 'output' : raw output files from calculator
# @arg @c 'phase' : phase files in portable format for report
# @arg @c 'atomic' : atomic scattering (phase, emission) files in portable format
# @arg @c 'cluster' : cluster files in portable XYZ format for report
# @arg @c 'log' : log files
# @arg @c 'debug' : debug files
@ -47,7 +47,7 @@ logger = logging.getLogger(__name__)
# the string is used only to specify whether bad models should be deleted or not.
# if so, all files related to bad models are deleted, regardless of their static category.
#
FILE_CATEGORIES = {'cluster', 'phase', 'input', 'output',
FILE_CATEGORIES = {'cluster', 'atomic', 'input', 'output',
'report', 'region', 'emitter', 'scan', 'symmetry', 'model',
'log', 'debug', 'population', 'rfac'}

View File

@ -36,7 +36,7 @@ except ImportError:
logger.warning("error importing matplotlib. graphics rendering disabled.")
def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False, ref_data=None):
"""
produce a graphics file from a one-dimensional scan file.
@ -47,14 +47,22 @@ def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
@param filename: path and name of the scan file.
this is used to derive the output file path by adding the extension of the graphics file format.
@param data: numpy-structured array of EI, ETPI or ETPAI data.
@param scan_mode: list containing the field name of the scanning axis of the data array.
it must contain one element exactly.
@param canvas: a FigureCanvas class reference from a matplotlib backend.
if None, the default FigureCanvasAgg is used which produces a bitmap file in PNG format.
@param is_modf: whether data contains a modulation function (True) or intensity (False, default).
this parameter is used to set axis labels.
@param ref_data: numpy-structured array of EI, ETPI or ETPAI data.
this is reference data (e.g. experimental data) that should be plotted with the main dataset.
both datasets will be plotted on the same axis and should have similar data range.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
@ -66,6 +74,8 @@ def render_1d_scan(filename, data, scan_mode, canvas=None, is_modf=False):
canvas(fig)
ax = fig.add_subplot(111)
if ref_data is not None:
ax.plot(ref_data[scan_mode[0]], ref_data['i'], 'k.')
ax.plot(data[scan_mode[0]], data['i'])
ax.set_xlabel(scan_mode[0])
@ -225,7 +235,7 @@ def render_tp_scan(filename, data, canvas=None, is_modf=False):
return out_filename
def render_scan(filename, data=None):
def render_scan(filename, data=None, ref_data=None):
"""
produce a graphics file from a scan file.
@ -248,6 +258,11 @@ def render_scan(filename, data=None):
@param data: numpy-structured array of ETPI or ETPAI data.
if this argument is omitted, the data is loaded from the file referenced by the filename argument.
@param ref_data: numpy-structured array of ETPI or ETPAI data.
this is reference data (e.g. experimental data) that should be plotted with the main dataset.
this is supported for 1d scans only.
both datasets will be plotted on the same axis and should have similar data range.
@return (str) path and name of the generated graphics file.
empty string if an error occurred.
"""
@ -258,7 +273,7 @@ def render_scan(filename, data=None):
try:
if len(scan_mode) == 1:
out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf)
out_filename = render_1d_scan(filename, data, scan_mode, is_modf=is_modf, ref_data=ref_data)
elif len(scan_mode) == 2 and 'e' in scan_mode:
out_filename = render_ea_scan(filename, data, scan_mode, is_modf=is_modf)
elif len(scan_mode) == 2 and 't' in scan_mode and 'p' in scan_mode:

View File

@ -344,7 +344,6 @@ class SingleModelHandler(ModelHandler):
modf_ext = ".modf" + parent_task.file_ext
parent_task.modf_filename = parent_task.file_root + modf_ext
assert not math.isnan(task.rfac)
self.result = task.model.copy()
self.result['_rfac'] = task.rfac
@ -629,7 +628,8 @@ class SymmetryHandler(TaskHandler):
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'scan')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'scan')
graph_file = mgs.render_scan(parent_task.modf_filename)
graph_file = mgs.render_scan(parent_task.modf_filename,
ref_data=self._project.scans[parent_task.id.scan].modulation)
self._project.files.add_file(graph_file, parent_task.id.model, 'scan')
del self._pending_ids_per_parent[parent_task.id]
@ -752,7 +752,8 @@ class EmitterHandler(TaskHandler):
self._project.evaluate_result(parent_task, child_tasks)
self._project.files.add_file(parent_task.result_filename, parent_task.id.model, 'symmetry')
self._project.files.add_file(parent_task.modf_filename, parent_task.id.model, 'symmetry')
graph_file = mgs.render_scan(parent_task.modf_filename)
graph_file = mgs.render_scan(parent_task.modf_filename,
ref_data=self._project.scans[parent_task.id.scan].modulation)
self._project.files.add_file(graph_file, parent_task.id.model, 'symmetry')
del self._pending_ids_per_parent[parent_task.id]

143
pmsco/igor.py Normal file
View File

@ -0,0 +1,143 @@
"""
@package pmsco.igor
data exchange with wavemetrics igor pro.
this module provides functions for loading/saving pmsco data in igor pro.
@author Matthias Muntwiler
@copyright (c) 2019 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from pmsco.compat import open
def _escape_igor_string(s):
s = s.replace('\\', '\\\\')
s = s.replace('"', '\\"')
return s
def namefix_double(name):
"""
fix 1-character wave name by doubling
replaces length-1 string by a doubled version.
@param name: (str) proposed wave name
@return: corrected name
"""
return name*2 if len(name) == 1 else name
def namefix_etpais(name):
"""
fix 1-character wave name according to ETPAIS scheme
replaces 'e' by 'en' etc.
@param name: (str) proposed wave name
@return: corrected name
"""
name_map = {'e': 'en', 't': 'th', 'p': 'ph', 'i': 'in', 'm': 'mo', 's': 'si'}
try:
return name_map[name]
except KeyError:
return name
class IgorExport(object):
"""
class exports pmsco data to an Igor text (ITX) file.
usage:
1) create an object instance.
2) set @ref data.
3) set optional attributes: @ref prefix and @ref namefix.
4) call @ref export.
"""
def __init__(self):
super(IgorExport, self).__init__()
self.data = None
self.prefix = ""
self.namefix = namefix_double
def set_data(self, data):
"""
set the data array to export.
this must (currently) be a one-dimensional structured array.
the column names will become wave names.
@param data: numpy.ndarray
@return:
"""
self.data = data
def export(self, filename):
"""
write to igor file.
"""
with open(filename, 'w') as f:
self._write_header(f)
self._write_data(f)
def _fix_name(self, name):
"""
fix a wave name.
this function first applies @ref namefix and @ref prefix to the proposed wave name.
@param name: (str) proposed wave name
@return: corrected name
"""
if self.namefix is not None:
name = self.namefix(name)
return self.prefix + name
def _write_header(self, f):
"""
write the header of the igor text file
@param f: open file or stream
@return: None
"""
f.write('IGOR' + '\n')
f.write('X // pmsco data export\n')
def _write_data(self, f):
"""
write a data section to the igor text file.
@param f: open file or stream
@return: None
"""
assert isinstance(self.data, np.ndarray)
assert len(self.data.shape) == 1
assert len(self.data.dtype.names[0]) >= 1
arr = self.data
shape = ",".join(map(str, arr.shape))
names = (self._fix_name(name) for name in arr.dtype.names)
names = ", ".join(names)
f.write('Waves/O/D/N=({shape}) {names}\n'.format(shape=shape, names=names))
f.write('BEGIN\n')
np.savetxt(f, arr, fmt='%g')
f.write('END\n')

View File

@ -1,15 +1,16 @@
SHELL=/bin/sh
# makefile for EDAC, MSC, and MUFPOT programs and modules
# makefile for external programs and modules
#
# see the top-level makefile for additional information.
.PHONY: all clean edac loess msc mufpot
.PHONY: all clean edac loess msc mufpot phagen
EDAC_DIR = edac
MSC_DIR = msc
MUFPOT_DIR = mufpot
LOESS_DIR = loess
PHAGEN_DIR = calculators/phagen
all: edac loess
@ -25,9 +26,13 @@ msc:
mufpot:
$(MAKE) -C $(MUFPOT_DIR)
phagen:
$(MAKE) -C $(PHAGEN_DIR)
clean:
$(MAKE) -C $(EDAC_DIR) clean
$(MAKE) -C $(LOESS_DIR) clean
$(MAKE) -C $(MSC_DIR) clean
$(MAKE) -C $(MUFPOT_DIR) clean
$(MAKE) -C $(PHAGEN_DIR) clean
rm -f *.pyc

View File

@ -344,9 +344,14 @@ class GridSearchHandler(handlers.ModelHandler):
time_pending += self._model_time
if time_pending > time_avail:
self._timeout = True
logger.warning("time limit reached")
if self._invalid_count > self._invalid_limit:
self._timeout = True
logger.error("number of invalid calculations (%u) exceeds limit", self._invalid_count)
model = self._next_model
if not self._timeout and model < self._pop.model_count and self._invalid_count < self._invalid_limit:
if not self._timeout and model < self._pop.model_count:
new_task = parent_task.copy()
new_task.parent_id = parent_id
pos = self._pop.positions[model]

View File

@ -491,9 +491,9 @@ class Population(object):
seed = np.genfromtxt(seed_file, names=True)
try:
seed = seed[seed['_rfac'] <= rfac_limit]
except KeyError:
logger.warning(BMsg("missing _rfac column in seed file {hf}. ignoring seed file.", hf=seed_file))
return 0
except ValueError:
recalc_seed = True
logger.warning(BMsg("missing _rfac column in seed file {hf}. re-calculating.", hf=seed_file))
else:
seed.sort(order='_rfac')
seed_size = min(seed.shape[0], count_limit)
@ -508,9 +508,12 @@ class Population(object):
if len(common_fields) < len(dest_fields):
logger.warning(BMsg("missing columns in seed file {hf}.", hf=seed_file))
logger.info(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))
logger.warning(BMsg("seeding population with {hs} models from file {hf}.", hs=seed_size, hf=seed_file))
self.pos['_rfac'][first:last] = seed['_rfac']
try:
self.pos['_rfac'][first:last] = seed['_rfac']
except ValueError:
self.pos['_rfac'][first:last] = 2.1
dest_index = np.arange(first, last)
for name in common_fields:
sel1 = np.less_equal(self.model_min[name], seed[name])
@ -525,11 +528,11 @@ class Population(object):
self.pos['_model'][first:last] = np.arange(seed_size) + first
if recalc_seed:
self.pos['_rfac'][first:last] = 2.1
logger.info("models from seed file are re-calculated.")
logger.warning("models from seed file are re-calculated.")
else:
sel = self.pos['_rfac'][first:last] <= rfac_limit
self.pos['_gen'][dest_index[sel]] = -1
logger.info(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))
logger.warning(BMsg("{0} models from seed file are not re-calculated.", np.sum(sel)))
return seed_size
@ -579,7 +582,7 @@ class Population(object):
if len(common_fields) < len(dest_fields):
logger.warning(BMsg("loaded patch file {pf}. some columns are missing.", pf=patch_file))
else:
logger.info(BMsg("loaded patch file {pf}.", pf=patch_file))
logger.warning(BMsg("loaded patch file {pf}.", pf=patch_file))
def _apply_patch(self):
"""
@ -592,7 +595,7 @@ class Population(object):
parameter values that lie outside the parameter domain (min/max) are ignored.
"""
if self.pos_patch is not None:
logger.info(BMsg("patching the population with new positions."))
logger.warning(BMsg("patching generation {gen} with new positions.", gen=self.generation))
source_fields = set(self.pos_patch.dtype.names)
dest_fields = set(self.model_start.keys())
common_fields = source_fields & dest_fields
@ -996,10 +999,10 @@ class Population(object):
# rewrite model, tolerance and results as two-dimensional array
if search_array is None:
results = self.results[names].copy()
else:
results = search_array[names].copy()
results = results.view((results.dtype[0], len(names)))
search_array = self.results
results = np.empty((search_array.shape[0], len(names)))
for col, name in enumerate(names):
results[:, col] = search_array[name]
model = np.asarray(model_tuple, results.dtype)
tol = np.asarray([max(abs(self.model_max[name]), abs(self.model_min[name]), precision)
for name in names])
@ -1169,8 +1172,9 @@ class PopulationHandler(handlers.ModelHandler):
"""
initialize the particle swarm and open an output file.
the population size is set to project.optimizer_params.['pop_size'] if it is defined and greater than 4.
otherwise, it defaults to <code>max(2 * slots, 4)</code>.
the population size is set to `project.optimizer_params['pop_size']`
if it is defined and greater than 4.
otherwise, it defaults to `max(slots, 4)`.
for good efficiency the population size (number of particles) should be
greater or equal to the number of available processing slots,
@ -1191,7 +1195,9 @@ class PopulationHandler(handlers.ModelHandler):
super(PopulationHandler, self).setup(project, slots)
_min_size = 4
self._pop_size = max(project.optimizer_params.get('pop_size', self._slots * 2), _min_size)
_def_size = self._slots
_req_size = project.optimizer_params.get('pop_size', 0)
self._pop_size = _req_size if _req_size >= _min_size else _def_size
self.setup_population()
self._invalid_limit = self._pop_size * 10
@ -1228,7 +1234,11 @@ class PopulationHandler(handlers.ModelHandler):
because the best peer position in the generation may not be known yet.
the effect can be reduced by making the population larger than the number of processes.
@return list of generated tasks. empty list if the optimization has converged (see Population.is_converged()).
the created tasks are returned as the function result and added to self._pending_tasks.
@return list of generated tasks.
empty list if the optimization has converged (see Population.is_converged())
or if the time limit is approaching.
"""
super(PopulationHandler, self).create_tasks(parent_task)
@ -1241,7 +1251,7 @@ class PopulationHandler(handlers.ModelHandler):
time_pending = self._model_time * len(self._pending_tasks)
time_avail = (self.datetime_limit - datetime.datetime.now()) * max(self._slots, 1)
out_tasks = []
new_tasks = []
if not self._timeout and not self._converged:
self._check_patch_file()
self._pop.advance_population()
@ -1250,7 +1260,8 @@ class PopulationHandler(handlers.ModelHandler):
time_pending += self._model_time
if time_pending > time_avail:
self._timeout = True
logger.info("time limit reached")
logger.warning("time limit reached")
new_tasks = []
break
if pos['_gen'] >= 0:
@ -1258,12 +1269,12 @@ class PopulationHandler(handlers.ModelHandler):
new_task.parent_id = parent_id
new_task.model = pos
new_task.change_id(model=pos['_model'])
new_tasks.append(new_task)
child_id = new_task.id
self._pending_tasks[child_id] = new_task
out_tasks.append(new_task)
for task in new_tasks:
self._pending_tasks[task.id] = task
return out_tasks
return new_tasks
def _check_patch_file(self):
"""
@ -1323,7 +1334,7 @@ class PopulationHandler(handlers.ModelHandler):
if task.result_valid:
if self._pop.is_converged() and not self._converged:
logger.info("population converged")
logger.warning("population converged")
self._converged = True
if task.time > self._model_time:

View File

@ -46,8 +46,6 @@ import sys
from mpi4py import MPI
import pmsco.calculators.calculator as calculator
import pmsco.cluster as cluster
import pmsco.dispatch as dispatch
import pmsco.files as files
import pmsco.handlers as handlers
@ -154,12 +152,6 @@ def set_common_args(project, args):
if args.table_file:
project.optimizer_params['table_file'] = args.table_file
code = args.code.lower()
if code in {'edac', 'msc', 'test'}:
project.code = code
else:
logger.error("invalid code argument")
if args.time_limit:
project.set_timedelta_limit(datetime.timedelta(hours=args.time_limit))
@ -178,38 +170,6 @@ def set_common_args(project, args):
project.keep_best = args.keep_best
def log_project_args(project):
"""
send some common project arguments to the log.
@param project: project instance (sub-class of pmsco.project.Project).
@return: None
"""
try:
logger.info("scattering code: {0}".format(project.code))
logger.info("optimization mode: {0}".format(project.mode))
try:
logger.info("minimum population size: {0}".format(project.optimizer_params['pop_size']))
except KeyError:
pass
try:
logger.info("seed file: {0}".format(project.optimizer_params['seed_file']))
logger.info("seed limit: {0}".format(project.optimizer_params['seed_limit']))
except KeyError:
pass
try:
logger.info("table file: {0}".format(project.optimizer_params['table_file']))
except KeyError:
pass
logger.info("data directory: {0}".format(project.data_dir))
logger.info("output file: {0}".format(project.output_file))
_files_to_keep = files.FILE_CATEGORIES - project.files.categories_to_delete
logger.info("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
except AttributeError:
logger.warning("AttributeError in log_project_args")
def run_project(project):
"""
run a calculation project.
@ -217,7 +177,11 @@ def run_project(project):
@param project:
@return:
"""
log_project_args(project)
# log project arguments only in rank 0
mpi_comm = MPI.COMM_WORLD
mpi_rank = mpi_comm.Get_rank()
if mpi_rank == 0:
project.log_project_args()
optimizer_class = None
if project.mode == 'single':
@ -240,26 +204,7 @@ def run_project(project):
project.handler_classes['region'] = handlers.choose_region_handler_class(project)
calculator_class = None
if project.code == 'edac':
logger.debug("importing EDAC interface")
from pmsco.calculators import edac
project.cluster_format = cluster.FMT_EDAC
calculator_class = edac.EdacCalculator
elif project.code == 'msc':
logger.debug("importing MSC interface")
from pmsco.calculators import msc
project.cluster_format = cluster.FMT_MSC
calculator_class = msc.MscCalculator
elif project.code == 'test':
logger.debug("importing TEST interface")
project.cluster_format = cluster.FMT_EDAC
calculator_class = calculator.TestCalculator
else:
logger.error("invalid code argument")
project.calculator_class = calculator_class
if project and optimizer_class and calculator_class:
if project and optimizer_class:
logger.info("starting calculations")
try:
dispatch.run_calculations(project)
@ -286,7 +231,7 @@ class Args(object):
values as the command line parser.
"""
def __init__(self, mode="single", code="edac", output_file="pmsco_data"):
def __init__(self, mode="single", output_file="pmsco_data"):
"""
constructor.
@ -299,8 +244,7 @@ class Args(object):
self.pop_size = 0
self.seed_file = ""
self.seed_limit = 0
self.code = code
self.data_dir = os.getcwd()
self.data_dir = ""
self.output_file = output_file
self.time_limit = 24.0
self.keep_files = files.FILE_CATEGORIES_TO_KEEP
@ -319,6 +263,7 @@ def get_cli_parser(default_args=None):
KEEP_FILES_CHOICES = files.FILE_CATEGORIES | {'all'}
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""
multiple-scattering calculations and optimization
@ -349,7 +294,7 @@ def get_cli_parser(default_args=None):
help='calculation mode')
parser.add_argument('--pop-size', type=int, default=default_args.pop_size,
help='population size (number of particles) in swarm or genetic optimization mode. ' +
'default is the greater of 4 or two times the number of calculation processes.')
'default is the greater of 4 or the number of calculation processes.')
parser.add_argument('--seed-file',
help='path and name of population seed file. ' +
'population data of previous optimizations can be used to seed a new optimization. ' +
@ -357,14 +302,11 @@ def get_cli_parser(default_args=None):
parser.add_argument('--seed-limit', type=int, default=default_args.seed_limit,
help='maximum number of models to use from the seed file. ' +
'the models with the best R-factors are selected.')
parser.add_argument('-c', '--code', choices=['msc', 'edac', 'test'], default=default_args.code,
help='scattering code (default: edac)')
parser.add_argument('-d', '--data-dir', default=default_args.data_dir,
help='directory path for experimental data files (if required by project). ' +
'default: working directory')
parser.add_argument('-o', '--output-file', default=default_args.output_file,
help='base path for intermediate and output files.' +
'default: pmsco_data')
help='base path for intermediate and output files.')
parser.add_argument('--table-file',
help='path and name of population table file for table optimization mode. ' +
'the file must have the same structure as the .pop or .dat files.')
@ -375,21 +317,18 @@ def get_cli_parser(default_args=None):
'of a limited number of best models are kept.')
parser.add_argument('--keep-best', type=int, default=default_args.keep_best,
help='number of best models for which to keep result files '
'(at each node from root down to keep-levels). '
'default 10 (project can define higher default).')
'(at each node from root down to keep-levels).')
parser.add_argument('--keep-levels', type=int, choices=range(5),
default=default_args.keep_levels,
help='task level down to which result files of best models are kept. '
'0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region. '
'default 1 (project can define higher default).')
'0 = model, 1 = scan, 2 = symmetry, 3 = emitter, 4 = region.')
parser.add_argument('-t', '--time-limit', type=float, default=default_args.time_limit,
help='wall time limit in hours. the optimizers try to finish before the limit. default: 24.')
help='wall time limit in hours. the optimizers try to finish before the limit.')
parser.add_argument('--log-file', default=default_args.log_file,
help='name of the main log file. ' +
'under MPI, the rank of the process is inserted before the extension. ' +
'defaults: output file + log, or pmsco.log.')
'under MPI, the rank of the process is inserted before the extension.')
parser.add_argument('--log-level', default=default_args.log_level,
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL. default: WARNING.')
help='minimum level of log messages. DEBUG, INFO, WARNING, ERROR, CRITICAL.')
feature_parser = parser.add_mutually_exclusive_group(required=False)
feature_parser.add_argument('--log-enable', dest='log_enable', action="store_true",
help="enable logging. by default, logging is on.")

View File

@ -33,12 +33,15 @@ from __future__ import print_function
import collections
import copy
import datetime
import git
import logging
import numpy as np
import os.path
import socket
import sys
from pmsco.calculators.calculator import InternalAtomicCalculator
from pmsco.calculators.edac import EdacCalculator
import pmsco.cluster as mc
from pmsco.compat import open
import pmsco.data as md
@ -177,32 +180,89 @@ class Params(object):
# @arg emission angle window (EDAC)
# @arg angular_broadening (MSC)
## @var binding_energy (float)
# initial state binding energy with respect to the Fermi level in eV
#
## @var initial_state (str)
# initial state
#
# 1s, 2p, 2p1/2, etc.
#
## @var phase_files (dict)
# dictionary of phase files.
# dictionary of phase or scattering matrix element files.
#
# the keys are atomic numbers, the values file names.
# if the dictionary is empty or the files don't exist, the phases are computed internally (EDAC only).
# whether the files contain phase shifts or matrix elements depends on the calculator.
# EDAC determines the kind of information from the first line in the file.
#
# if the dictionary is empty or the files don't exist,
# the scattering matrix is computed by the calculator (if supported).
#
# maps to:
# @arg scatterer (EDAC)
# @arg atomic_number, phase_file (MSC)
## @var phase_output_classes (int or iterable of int)
# atom classes for which to output phase files
#
# if the atomic scattering factors are calculated internally,
# EDAC can export them to scattering files.
#
# this parameter can be one of
# @arg None (default) no phase output,
# @arg integer number defining a range 0:N-1 of atom classes,
# @arg iterable (e.g., set or sequence) of atom classes to export.
#
# the problem is that EDAC expects the user to list each atom class to export,
# though it is not possible to know how many classes there will be
# or which atoms belong to which class before the calculation is actually done.
# the number of classes will be between the number of different elements and the number of atoms.
#
# thus, this parameter should normally be left at its default value
# and used only in specific situations that can be processed manually.
# if the parameter is non-default, EDAC will also produce a cluster output
# that includes a mapping between atomic coordinates and atom classes.
#
# @note the files generated belong to the category "output".
# you need to specify `--keep-files output` to prevent them from getting cleaned up.
## @var polarization (str)
# photon polarization
#
# 'H', 'V', 'L', 'R', 'U'
#
## @var rme_files (dict)
# dictionary of radial matrix element files.
#
# if the dictionary is empty or the files don't exist,
# the radial matrix defaults to the rme_xxx_xxx attributes.
#
# in EDAC, RME files or constants are considered only if @ref phase_files are specified.
#
## @var work function (float)
# work function in eV
#
# the energy scale of EDAC is referenced to the vacuum level
# but data files are referenced to the Fermi level.
# the @ref pmsco.calculators.edac module adds the work function to the kinetic energy before it calls EDAC.
#
def __init__(self):
self.title = "default parameters"
self.comment = "set by project.Params()"
self.cluster_file = ""
self.output_file = ""
self.scan_file = ""
# EDAC convention: 1s, 2p, 2p1/2, etc.
self.initial_state = "1s"
# MSC convention: H, V, L, R, U
self.binding_energy = 0.0
self.polarization = "H"
self.angular_resolution = 1.0
self.z_surface = 0.0
self.inner_potential = 10.0
# the energy scale of EDAC is referenced to the vacuum level
# but data files are referenced to the Fermi level
# the msc_edac module adds the work function to the kinetic energy before it calls EDAC
self.work_function = 0.0
self.symmetry_range = 360.0
self.polar_incidence_angle = 60.0
@ -211,6 +271,11 @@ class Params(object):
self.debye_temperature = 400.0
self.debye_wavevector = 1.0
self.phase_files = {}
self.rme_files = {}
self.rme_minus_value = 0.1
self.rme_minus_shift = 0.0
self.rme_plus_value = 1.0
self.rme_plus_shift = 0.0
# used by MSC only
self.spherical_order = 2
self.scattering_level = 5
@ -221,15 +286,23 @@ class Params(object):
self.planewave_attenuation = 1.0
self.vibration_model = "N"
self.substrate_atomic_mass = 1.0
self.rme_minus_value = 0.5
self.rme_minus_shift = 0.0
self.rme_plus_value = 0.5
self.rme_plus_shift = 0.0
# used by EDAC only
self.emitters = [(0.0, 0.0, 0.0, 0)]
self.lmax = 15
self.dmax = 5.0
self.orders = [20]
self.phase_output_classes = None
@property
def l_init(self):
"""
initial state l quantum number.
this is converted from the initial_state property.
@return: (int) 0..3
"""
return "spdf".index(self.initial_state[1])
class Scan(object):
@ -301,11 +374,45 @@ class Scan(object):
self.mode = []
self.emitter = ""
self.initial_state = "1s"
self.energies = np.zeros((0))
self.thetas = np.zeros((0))
self.phis = np.zeros((0))
self.alphas = np.zeros((0))
self.positions = {
'e': np.empty(0),
't': np.empty(0),
'p': np.empty(0),
'a': np.empty(0),
}
@property
def energies(self):
return self.positions['e']
@energies.setter
def energies(self, value):
self.positions['e'] = value
@property
def thetas(self):
return self.positions['t']
@thetas.setter
def thetas(self, value):
self.positions['t'] = value
@property
def phis(self):
return self.positions['p']
@phis.setter
def phis(self, value):
self.positions['p'] = value
@property
def alphas(self):
return self.positions['a']
@alphas.setter
def alphas(self, value):
self.positions['a'] = value
def copy(self):
"""
create a copy of the scan.
@ -314,9 +421,9 @@ class Scan(object):
"""
return copy.deepcopy(self)
def set_scan(self, filename, emitter, initial_state):
def import_scan_file(self, filename, emitter, initial_state):
"""
set file name of reference experiment and load it.
import the reference experiment.
the extension must be one of msc_data.DATATYPES (case insensitive)
corresponding to the meaning of the columns in the file.
@ -324,9 +431,8 @@ class Scan(object):
this method does not calculate the modulation function.
@attention EDAC can only calculate equidistant, rectangular scans.
this version introduces holo scans as an experimental feature.
for all other scan types, the scan file must exactly conform with a rectangular scan.
the following scans are currently supported:
holo scans are transparently mapped to rectangular scans by pmsco.
this method accepts the following scans:
* intensity vs energy at fixed theta, phi
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
@ -347,43 +453,120 @@ class Scan(object):
if self.filename:
self.raw_data = md.load_data(self.filename)
self.dtype = self.raw_data.dtype
self.mode, positions = md.detect_scan_mode(self.raw_data)
self.mode, self.positions = md.detect_scan_mode(self.raw_data)
if 'e' in self.mode:
self.energies = positions['e']
else:
if 'e' not in self.mode:
try:
self.energies = np.asarray((self.raw_data['e'][0], ))
except ValueError:
logger.error("missing energy in scan file %s", self.filename)
raise
if 't' in self.mode:
self.thetas = positions['t']
else:
if 't' not in self.mode:
try:
self.thetas = np.asarray((self.raw_data['t'][0], ))
except ValueError:
logger.info("missing theta in scan file %s, defaulting to 0.0", self.filename)
self.thetas = np.zeros((1))
self.thetas = np.zeros(1)
if 'p' in self.mode:
self.phis = positions['p']
else:
if 'p' not in self.mode:
try:
self.phis = np.asarray((self.raw_data['p'][0], ))
except ValueError:
logger.info("missing phi in scan file %s, defaulting to 0.0", self.filename)
self.phis = np.zeros((1))
self.phis = np.zeros(1)
if 'a' in self.mode:
self.alphas = positions['a']
else:
if 'a' not in self.mode:
try:
self.alphas = np.asarray((self.raw_data['a'][0], ))
except ValueError:
logger.info("missing alpha in scan file %s, defaulting to 0.0", self.filename)
self.alphas = np.zeros((1))
self.alphas = np.zeros(1)
def define_scan(self, positions, emitter, initial_state):
"""
define a cartesian (rectangular/grid) scan.
this method initializes the scan with a one- or two-dimensional cartesian scan
of the four possible scan dimensions.
the scan range is given as arguments, the intensity values are initialized as 1.
the file name and modulation functions are reset to empty and None, respectively.
the method can create the following scan schemes:
* intensity vs energy at fixed theta, phi
* intensity vs analyser angle vs energy at normal emission (theta = 0, constant phi)
* intensity vs theta, phi, or alpha
* intensity vs theta and phi (rectangular holo scan)
@param positions: (dictionary of numpy arrays)
the dictionary must contain a one-dimensional array for each scan dimension 'e', 't', 'p' and 'a'.
these array must contain unique, equidistant positions.
constant dimensions must contain exactly one value.
missing angle dimensions default to 0,
a missing energy dimension results in a KeyError.
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
@param initial_state: (string) nl term of the initial state of the atom, e.g. "2p".
"""
self.filename = ""
self.emitter = emitter
self.initial_state = initial_state
self.mode = []
shape = 1
try:
self.energies = np.copy(positions['e'])
except KeyError:
logger.error("missing energy in define_scan arguments")
raise
else:
if self.energies.shape[0] > 1:
self.mode.append('e')
shape *= self.energies.shape[0]
try:
self.thetas = np.copy(positions['t'])
except KeyError:
logger.info("missing theta in define_scan arguments, defaulting to 0.0")
self.thetas = np.zeros(1)
else:
if self.thetas.shape[0] > 1:
self.mode.append('t')
shape *= self.thetas.shape[0]
try:
self.phis = np.copy(positions['p'])
except KeyError:
logger.info("missing phi in define_scan arguments, defaulting to 0.0")
self.phis = np.zeros(1)
else:
if self.phis.shape[0] > 1:
self.mode.append('p')
shape *= self.phis.shape[0]
try:
self.alphas = np.copy(positions['a'])
except KeyError:
logger.info("missing alpha in define_scan arguments, defaulting to 0.0")
self.alphas = np.zeros(1)
else:
if self.alphas.shape[0] > 1:
self.mode.append('a')
shape *= self.alphas.shape[0]
assert 0 < len(self.mode) <= 2, "unacceptable number of dimensions in define_scan"
assert not ('t' in self.mode and 'a' in self.mode), "unacceptable combination of dimensions in define_scan"
self.dtype = md.DTYPE_ETPAI
self.raw_data = np.zeros(shape, self.dtype)
dimensions = [self.positions[dim] for dim in ['e', 't', 'p', 'a']]
grid = np.meshgrid(*dimensions)
for i, dim in enumerate(['e', 't', 'p', 'a']):
self.raw_data[dim] = grid[i].reshape(-1)
self.raw_data['i'] = 1
# noinspection PyMethodMayBeStatic
@ -465,9 +648,8 @@ class Project(object):
#
# @arg @c 'pop_size' (int)
# population size (number of particles) in the swarm or genetic optimization mode.
# by default, the ParticleSwarmHandler chooses the population size depending on the number of parallel processes.
# by default, the population size is set to the number of parallel processes or 4, whichever is greater.
# you may want to override the default value in cases where the automatic choice is not appropriate.
# the value can be set by the command line.
# @arg @c 'seed_file' (string)
# name of a file containing the results from previous optimization runs.
# this can be used to resume a swarm or genetic optimization where it was interrupted before.
@ -537,9 +719,27 @@ class Project(object):
# @arg 3 = emitter level: emitter nodes in addition to level 1.
# @arg 4 = region level: region nodes in addition to level 1.
## @var atomic_scattering_factory
# factory function to create an atomic scattering calculator
#
# this can also be the name of a class.
# the calculator must inherit from pmsco.calculators.calculator.AtomicCalculator.
# the name of atomic scattering calculator classes should end in AtomicCalculator.
## @var multiple_scattering_factory
# factory function to create a multiple scattering calculator
#
# this can also be the name of a class.
# the calculator must inherit from pmsco.calculators.calculator.Calculator
#
# example: pmsco.calculators.edac.EdacCalculator
#
def __init__(self):
self.mode = "single"
self.code = "edac"
self.job_name = ""
self.git_hash = ""
self.description = ""
self.features = {}
self.cluster_format = mc.FMT_EDAC
self.cluster_generator = mc.LegacyClusterGenerator(self)
@ -568,7 +768,8 @@ class Project(object):
'emit': handlers.EmitterHandler,
'region': handlers.SingleRegionHandler
}
self.calculator_class = None
self.atomic_scattering_factory = InternalAtomicCalculator
self.multiple_scattering_factory = EdacCalculator
self._tasks_fields = []
self._db = database.ResultsDatabase()
@ -608,7 +809,7 @@ class Project(object):
self.combined_scan = None
self.combined_modf = None
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None):
def add_scan(self, filename, emitter, initial_state, is_modf=False, modf_model=None, positions=None):
"""
add the file name of reference experiment and load it.
@ -627,6 +828,15 @@ class Project(object):
it also updates @c combined_scan and @c combined_modf which may be used as R-factor comparison targets.
@param filename: (string) file name of the experimental data, possibly including a path.
the file is not loaded when the optional positions argument is present,
but the filename may serve as basename for output files (e.g. modulation function).
@param positions: (optional, dictionary of numpy arrays) scan positions.
if specified, the file given by filename is _not_ loaded,
and the scan positions are initialized from this dictionary.
the dictionary keys are the possible scan dimensions: 'e', 't', 'p', 'a'.
the arrays are one-dimensional and contain unique, equidistant positions.
constant dimensions have shape 1. see @ref Scan.define_scan.
@param emitter: (string) chemical symbol of the photo-emitting atom, e.g. "Cu".
@ -638,11 +848,13 @@ class Project(object):
@param modf_model: (dict) model parameters to be passed to the modulation function.
@return (Scan) the new scan object (which is also a member of self.scans).
@todo the accepted scanning schemes should be generalized.
"""
scan = Scan()
scan.set_scan(filename, emitter, initial_state)
if positions is not None:
scan.define_scan(positions, emitter, initial_state)
scan.filename = filename
else:
scan.import_scan_file(filename, emitter, initial_state)
self.scans.append(scan)
if modf_model is None:
@ -735,6 +947,41 @@ class Project(object):
"""
self.timedelta_limit = timedelta
def log_project_args(self):
"""
send some common project attributes to the log.
the attributes are normally logged at WARNING level.
this method is called by the main pmsco module after creating the project and assigning command line arguments.
it may be overridden to add logs of attributes of the sub-class.
@return: None
"""
try:
logger.warning("atomic scattering: {0}".format(self.atomic_scattering_factory))
logger.warning("multiple scattering: {0}".format(self.multiple_scattering_factory))
logger.warning("optimization mode: {0}".format(self.mode))
for key in sorted(self.optimizer_params):
val = self.optimizer_params[key]
lev = logging.WARNING if val else logging.DEBUG
logger.log(lev, "optimizer_params['{k}']: {v}".format(k=key, v=val))
logger.warning("data directory: {0}".format(self.data_dir))
logger.warning("output file: {0}".format(self.output_file))
_files_to_keep = files.FILE_CATEGORIES - self.files.categories_to_delete
logger.warning("intermediate files to keep: {0}".format(", ".join(_files_to_keep)))
for idx, scan in enumerate(self.scans):
logger.warning(BMsg("scan {0}: {filename} ({emitter} {initial_state})", idx, **vars(scan)))
for idx, sym in enumerate(self.symmetries):
logger.warning(BMsg("symmetry {0}: {sym}", idx, sym=sym))
except AttributeError:
logger.warning("AttributeError in log_project_args")
def combine_symmetries(self, parent_task, child_tasks):
"""
combine results of different symmetry into one result and calculate the modulation function.
@ -937,6 +1184,23 @@ class Project(object):
else:
md.save_data(parent_task.modf_filename, modf)
def get_git_hash(self):
"""
get the git commit (hash) of the running code (HEAD)
the method looks for a git repository in the source tree of this module.
if successful, it returns the hash string of the HEAD commit.
@return: hexadecimal hash string.
empty string if the file is not in a git repository.
"""
try:
repo = git.Repo(__file__, search_parent_directories=True)
except git.exc.InvalidGitRepositoryError:
return ""
else:
return repo.head.commit.hexsha
def setup(self, handlers):
"""
prepare for calculations.
@ -954,11 +1218,13 @@ class Project(object):
@return: None
"""
self.git_hash = self.get_git_hash()
fields = ["rfac"]
fields.extend(dispatch.CalcID._fields)
fields.append("secs")
fields = ["_" + f for f in fields]
dom = self.create_domain()
model_fields = dom.start.keys()
model_fields = list(dom.start.keys())
model_fields.sort(key=lambda name: name.lower())
fields.extend(model_fields)
self._tasks_fields = fields
@ -968,16 +1234,16 @@ class Project(object):
outfile.write(" ".join(fields))
outfile.write("\n")
# todo : fill in the descriptive fields, change to file-database
# todo : change to file-database
self._db.connect(":memory:")
project_id = self._db.register_project(self.__class__.__name__, sys.argv[0])
job_id = self._db.register_job(project_id,
"job-name",
self.job_name,
self.mode,
socket.gethostname(),
"git-hash",
self.git_hash,
datetime.datetime.now(),
"description")
self.description)
self._db.register_params(model_fields)
self._db.create_models_view()
@ -1012,6 +1278,7 @@ class Project(object):
values_dict = {"_" + k: v for k, v in values_dict.items()}
values_dict.update(parent_task.model)
values_dict['_rfac'] = parent_task.rfac
values_dict['_secs'] = parent_task.time.total_seconds()
values_list = [values_dict[field] for field in self._tasks_fields]
with open(self.output_file + ".tasks.dat", "a") as outfile:
outfile.write(" ".join(format(value) for value in values_list) + "\n")
@ -1258,6 +1525,76 @@ class Project(object):
return _files
def before_atomic_scattering(self, task, par, clu):
"""
project hook before atomic scattering factors are calculated.
this method derives modified Params and Cluster objects for the atomic scattering calculation
from the original objects that will be used in the multiple scattering calculation.
in the basic version, the method does not change the objects
except that it returns None for the root task (reference cluster).
subclasses may override it to modify or replace the cluster.
@param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
if the model index is -1, the project can return the global reference cluster
(to calculate the fixed scattering factors that will be used for all models)
or None if no global scattering factors should be calculated.
do not modify this object!
@param par: @ref pmsco.project.Params object representing the preliminary
multiple scattering input parameters of the current task.
the method can make modifications to this object instance directly.
@param clu: @ref pmsco.cluster.Cluster object representing the preliminary
multiple scattering cluster of the current task.
the method can make modifications to this object instance directly.
@return: a tuple (par, clu) where par and clu are the input parameters and cluster
to be used for the calculation of atomic scattering factors.
these should either be the original function arguments,
or copies of the original arguments.
if atomic scattering factors should not be calculated, the return values should be None.
"""
if task.id.model >= 0:
return par, clu
else:
return None, None
def after_atomic_scattering(self, task, par, clu):
"""
project hook after atomic scattering factors are calculated.
this method cleans up the Params and Cluster objects from the atomic scattering calculation
so that they can be used in the multiple scattering calculation.
in the basic version, the method just passes the input parameters for model tasks
and returns None for the root task.
subclasses may override it and modify the cluster and/or input parameters
so that the desired atomic scattering factors are used.
@param task: @ref pmsco.dispatch.CalculationTask object representing the current calculation task.
if the model index is -1, the project should return the global reference cluster
(to calculate the fixed scattering factors that will be used for all models)
or None if no global scattering factors should be calculated.
@param par: @ref pmsco.project.Params object representing the preliminary
multiple scattering input parameters of the current task.
@param clu: @ref pmsco.cluster.Cluster object representing the preliminary
multiple scattering cluster of the current task.
do not modify this object, make a copy!
@return: a tuple (par, clu) where par and clu are the input parameters and cluster
to be used for the calculation of atomic scattering factors.
these should either be the original function arguments,
or copies of the original arguments.
"""
if task.id.model >= 0:
return par, clu
else:
return None, None
def cleanup(self):
"""
delete unwanted files at the end of a project.

View File

@ -1,12 +1,10 @@
#!/usr/bin/env python2
"""
@package pmsco.projects.fcc
scattering calculation project for the (111) surface of an arbitrary face-centered cubic crystal
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015 by Paul Scherrer Institut @n
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
@ -171,16 +169,14 @@ class FCC111Project(mp.Project):
dom.add_param('Zsurf', 1.00, 0.00, 2.00, 0.50)
return dom
def create_project(element):
def create_project():
"""
create an FCC111Project calculation project.
@param element: symbol of the chemical element of the atoms contained in the cluster.
"""
project = FCC111Project()
project.element = element
project_dir = os.path.dirname(os.path.abspath(__file__))
project.data_dir = project_dir
@ -188,9 +184,9 @@ def create_project(element):
# scan dictionary
# to select any number of scans, add their dictionary keys as scans option on the command line
project.scan_dict['default'] = {'filename': os.path.join(project_dir, "demo_holo_scan.etp"),
'emitter': "Ni", 'initial_state': "3s"}
'emitter': "Ni", 'initial_state': "3s"}
project.scan_dict['holo'] = {'filename': os.path.join(project_dir, "demo_holo_scan.etp"),
'emitter': "Ni", 'initial_state': "3s"}
'emitter': "Ni", 'initial_state': "3s"}
project.scan_dict['alpha'] = {'filename': os.path.join(project_dir, "demo_alpha_scan.etp"),
'emitter': "Ni", 'initial_state': "3s"}
@ -224,6 +220,7 @@ def set_project_args(project, project_args):
try:
if project_args.element:
project.element = project_args.element
for scan in project.scans:
scan.emitter = project_args.element
logger.warning(BMsg("override emitters to {0}", project.emitter))
@ -258,5 +255,5 @@ def parse_project_args(_args):
parser.add_argument('--energy', type=float,
help="kinetic energy of photoelectron (override scan file)")
parsed_args = parser.parse_known_args(_args)
parsed_args = parser.parse_args(_args)
return parsed_args

View File

@ -1,6 +1,6 @@
attrdict
fasteners
numpy
numpy >= 1.13
periodictable
statsmodels
mpi4py
@ -9,3 +9,5 @@ mock
scipy
matplotlib
future
swig
gitpython

View File

@ -10,7 +10,7 @@ to run the tests, change to the directory which contains the tests directory, an
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-17 by Paul Scherrer Institut @n
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
@ -102,16 +102,18 @@ class TestClusterFunctions(unittest.TestCase):
@return: None
"""
clu = self.create_cube()
xy2 = clu.data[['x', 'y']].copy()
xy3 = xy2.view((xy2.dtype[0], len(xy2.dtype.names)))
xy3 = np.empty((clu.data.shape[0], 2), np.float32)
xy3[:, 0] = clu.data['x']
xy3[:, 1] = clu.data['y']
ctr = np.asarray((1.0, 0.0, 0.0))
dist = np.linalg.norm(xy3 - ctr[0:2], axis=1)
self.assertAlmostEqual(1.0, dist[0])
self.assertAlmostEqual(0.0, dist[1])
clu.clear()
xy2 = clu.data[['x', 'y']].copy()
xy3 = xy2.view((xy2.dtype[0], len(xy2.dtype.names)))
xy3 = np.empty((clu.data.shape[0], 2), np.float32)
xy3[:, 0] = clu.data['x']
xy3[:, 1] = clu.data['y']
ctr = np.asarray((1.0, 0.0, 0.0))
dist = np.linalg.norm(xy3 - ctr[0:2], axis=1)
self.assertEqual(0, dist.shape[0])
@ -156,7 +158,7 @@ class TestClusterFunctions(unittest.TestCase):
clu.set_emitter(idx=0)
clu.set_emitter(idx=9)
self.assertEqual(2, clu.get_emitter_count())
result = clu.get_emitters()
result = clu.get_emitters(['x', 'y', 'z', 't'])
expect = [(0., 0., 0., 1), (1., 0., 1., 10)]
self.assertEqual(expect, result)
@ -233,7 +235,7 @@ class TestClusterFunctions(unittest.TestCase):
emitter = np.array((0.0, 0.0, 0.0))
clu.add_layer(7, a1, b1, b2)
pos = clu.find_positions(pos=emitter)
self.assertEqual(len(pos), 1)
self.assertEqual(1, len(pos))
def test_add_cluster(self):
clu1 = mc.Cluster()
@ -244,15 +246,18 @@ class TestClusterFunctions(unittest.TestCase):
clu1.add_atom(5, np.asarray([0, 0, -2]), 0)
clu2 = mc.Cluster()
clu2.add_atom(3, np.asarray([-0.2, 0, 0]), 0)
clu2.add_atom(4, np.asarray([0, -0.2, 0]), 0)
clu2.add_atom(5, np.asarray([0, 0.05, -1]), 0)
clu2.add_atom(5, np.asarray([0, 0, -1.01]), 0)
clu2.add_atom(6, np.asarray([0, 0, -1.99]), 0)
clu2.add_atom(3, np.asarray([-0.2, 0, 0]), 0) # unique
clu2.add_atom(4, np.asarray([0, -0.2, 0]), 0) # unique
clu2.add_atom(5, np.asarray([0, 0.05, -1]), 0) # not unique
clu2.add_atom(5, np.asarray([0, 0, -1.09]), 0) # just within tolerance of uniqueness
clu2.add_atom(6, np.asarray([0, 0, -1.99]), 0) # not unique
clu2.add_atom(7, np.asarray([0, 0, -1.10]), 0) # just out of tolerance of uniqueness
clu1.set_rmax(1.5)
clu1.add_cluster(clu2, check_rmax=True, check_unique=True, tol=0.1)
self.assertEqual(clu1.get_atom_count(), 5+2)
self.assertEqual(5+3, clu1.get_atom_count())
self.assertEqual(7, clu1.data['t'][-1])
self.assertEqual(6, clu2.data.shape[0])
def test_find_positions(self):
clu = mc.Cluster()
@ -269,8 +274,14 @@ class TestClusterFunctions(unittest.TestCase):
clu.add_layer(7, a_N, b1, b2)
clu.add_layer(5, a_B, b1, b2)
pos = clu.find_positions(pos=emitter)
self.assertEqual(len(pos), 1)
self.assertEqual(pos[0], 206)
self.assertEqual(1, len(pos))
self.assertEqual(206, pos[0])
# position in the format returned by get_emitters
emitter = (emitter[0], emitter[1], emitter[2], 7)
pos = clu.find_positions(pos=emitter)
self.assertEqual(1, len(pos))
self.assertEqual(206, pos[0])
def test_find_index_cylinder(self):
clu = self.create_cube()
@ -278,11 +289,11 @@ class TestClusterFunctions(unittest.TestCase):
rxy = 0.5
rz = 1.0
idx = clu.find_index_cylinder(pos, rxy, rz, None)
self.assertEqual(len(idx), 2)
self.assertEqual(clu.get_atomtype(idx[0]), 8)
self.assertEqual(clu.get_atomtype(idx[1]), 20)
self.assertEqual(2, len(idx))
self.assertEqual(8, clu.get_atomtype(idx[0]))
self.assertEqual(20, clu.get_atomtype(idx[1]))
idx = clu.find_index_cylinder(pos, rxy, rz, 8)
self.assertEqual(len(idx), 1)
self.assertEqual(1, len(idx))
def test_trim_cylinder(self):
clu = mc.Cluster()
@ -296,12 +307,12 @@ class TestClusterFunctions(unittest.TestCase):
r0 = 2.3
z0 = 4.2
clu.trim_cylinder(r0, z0)
self.assertEqual(clu.data.dtype, clu.dtype)
self.assertEqual(clu.data.shape[0], 21 * 5)
self.assertEqual(clu.data[1]['i'], 2)
self.assertEqual(clu.data[1]['s'], 'N')
self.assertEqual(clu.data[1]['t'], 7)
self.assertEqual(clu.get_emitter_count(), 1)
self.assertEqual(clu.dtype, clu.data.dtype)
self.assertEqual(21 * 5, clu.data.shape[0])
self.assertEqual(2, clu.data[1]['i'])
self.assertEqual('N', clu.data[1]['s'])
self.assertEqual(7, clu.data[1]['t'])
self.assertEqual(1, clu.get_emitter_count())
n_low = np.sum(clu.data['z'] < -z0)
self.assertEqual(0, n_low)
n_high = np.sum(clu.data['z'] > z0)
@ -320,12 +331,12 @@ class TestClusterFunctions(unittest.TestCase):
clu.set_emitter(pos=v_pos)
r0 = 2.3
clu.trim_sphere(r0)
self.assertEqual(clu.data.dtype, clu.dtype)
self.assertEqual(clu.data.shape[0], 39)
self.assertEqual(clu.data[1]['i'], 2)
self.assertEqual(clu.data[1]['s'], 'N')
self.assertEqual(clu.data[1]['t'], 7)
self.assertEqual(clu.get_emitter_count(), 1)
self.assertEqual(clu.dtype, clu.data.dtype)
self.assertEqual(39, clu.data.shape[0])
self.assertEqual(2, clu.data[1]['i'])
self.assertEqual('N', clu.data[1]['s'])
self.assertEqual(7, clu.data[1]['t'])
self.assertEqual(1, clu.get_emitter_count())
n_out = np.sum(clu.data['x']**2 + clu.data['y']**2 + clu.data['z'] > r0**2)
self.assertEqual(0, n_out)
@ -355,9 +366,9 @@ class TestClusterFunctions(unittest.TestCase):
def test_trim_slab(self):
clu = self.create_cube()
clu.trim_slab('x', 0.5, 1.1)
self.assertEqual(clu.data.dtype, clu.dtype)
self.assertEqual(clu.data.shape[0], 9 * 2)
self.assertEqual(clu.get_emitter_count(), 1)
self.assertEqual(clu.dtype, clu.data.dtype)
self.assertEqual(9 * 2, clu.data.shape[0])
self.assertEqual(1, clu.get_emitter_count())
def test_save_to_file(self):
clu = self.create_cube()
@ -367,12 +378,34 @@ class TestClusterFunctions(unittest.TestCase):
clu.save_to_file(f, mc.FMT_XYZ, "qwerty", emitters_only=True)
f.seek(0)
line = f.readline()
self.assertEqual(line, b"2\n", b"line 1: " + line)
self.assertEqual(b"2\n", line, b"line 1: " + line)
line = f.readline()
self.assertEqual(line, b"qwerty\n", b"line 2: " + line)
self.assertEqual(b"qwerty\n", line, b"line 2: " + line)
line = f.readline()
self.assertRegexpMatches(line, b"H +[0.]+ +[0.]+ +[0.]+", b"line 3: " + line)
line = f.readline()
self.assertRegexpMatches(line, b"Si +[01.-]+ +[01.-]+ +[0.]+", b"line 4: " + line)
line = f.readline()
self.assertEqual(line, b"", b"end of file")
self.assertEqual(b"", line, b"end of file")
def test_update_atoms(self):
clu = mc.Cluster()
clu.add_atom(1, np.asarray([0, 0, 0]), 1)
clu.add_atom(3, np.asarray([0, 1, 0]), 0)
clu.add_atom(5, np.asarray([-1, 0, 0]), 0)
clu.add_atom(6, np.asarray([0, -1, 0]), 0)
clu.add_atom(2, np.asarray([1, 0, 0]), 0)
clu.add_atom(4, np.asarray([0, 0, 1]), 0)
other = mc.Cluster()
other.add_atom(1, np.asarray([0, 0, 0]), 1)
other.add_atom(5, np.asarray([-1, 0, 0]), 0)
other.add_atom(2, np.asarray([1, 0, 0]), 0)
other.add_atom(6, np.asarray([0, -1, 0]), 0)
other.add_atom(3, np.asarray([0, 1, 0]), 0)
other.add_atom(4, np.asarray([0, 0, 1]), 0)
other.data['c'] = np.asarray((1, 2, 2, 3, 3, 4))
clu.update_atoms(other, {'c'})
expected = np.asarray((1, 3, 2, 3, 2, 4))
np.testing.assert_array_equal(expected, clu.data['c'])

View File

@ -143,7 +143,7 @@ class TestDatabase(unittest.TestCase):
self.db.insert_model(model5)
results = self.db.query_project_params(project_id=project1)
expected = ['parA', 'parB']
self.assertEqual(expected, sorted(results.keys()))
self.assertEqual(expected, sorted(list(results.keys())))
def test_insert_model(self):
self.setup_sample_database()

71
tests/test_grid.py Normal file
View File

@ -0,0 +1,71 @@
"""
@package tests.test_grid
unit tests for pmsco.optimizers.grid
the purpose of these tests is to help debugging the code.
to run the tests, change to the directory which contains the tests directory, and execute =nosetests=.
@pre nose must be installed (python-nose package on Debian).
@author Matthias Muntwiler, matthias.muntwiler@psi.ch
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import random
import unittest
import pmsco.optimizers.grid as mo
import pmsco.project as mp
class TestPopulation(unittest.TestCase):
def setUp(self):
random.seed(0)
self.domain = mp.Domain()
self.domain.add_param('A', 1.5, 1.0, 2.0, 0.2)
self.domain.add_param('B', 2.5, 2.0, 3.0, 0.25)
self.domain.add_param('C', 3.5, 3.5, 3.5, 0.0)
self.expected_popsize = 30
self.expected_names = ('_model', '_rfac', 'A', 'B', 'C')
self.pop = mo.GridPopulation()
def tearDown(self):
# after each test method
self.pop = None
@classmethod
def setup_class(cls):
# before any methods in this class
pass
@classmethod
def teardown_class(cls):
# teardown_class() after any methods in this class
pass
def test_setup(self):
self.pop.setup(self.domain)
self.assertEqual(self.pop.positions.dtype.names, self.expected_names)
self.assertEqual(self.pop.positions.shape, (self.expected_popsize,))
self.assertEqual(self.pop.model_count, self.expected_popsize)
check = np.arange(self.expected_popsize)
np.testing.assert_array_equal(self.pop.positions['_model'], check)
check = np.ones(self.expected_popsize) * 2.1
np.testing.assert_array_almost_equal(self.pop.positions['_rfac'], check)
if __name__ == '__main__':
unittest.main()

View File

@ -20,14 +20,71 @@ Licensed under the Apache License, Version 2.0 (the "License"); @n
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import mock
import numpy as np
import os
import unittest
import pmsco.data as data
import pmsco.dispatch as dispatch
import pmsco.project as project
class TestScan(unittest.TestCase):
"""
test case for @ref pmsco.project.Scan class
"""
def test_import_scan_file(self):
base_dir = os.path.dirname(os.path.abspath(__file__))
test_file = os.path.join(base_dir, "..", "projects", "twoatom", "twoatom_energy_alpha.etpai")
scan = project.Scan()
scan.import_scan_file(test_file, "C", "1s")
mode = ['e', 'a']
self.assertEqual(scan.mode, mode)
ae = np.arange(10, 1005, 5)
at = np.asarray([0])
ap = np.asarray([0])
aa = np.arange(-90, 91, 1)
np.testing.assert_array_almost_equal(scan.energies, ae)
np.testing.assert_array_almost_equal(scan.thetas, at)
np.testing.assert_array_almost_equal(scan.phis, ap)
np.testing.assert_array_almost_equal(scan.alphas, aa)
def test_define_scan(self):
scan = project.Scan()
p0 = np.asarray([20])
p1 = np.linspace(1, 4, 4)
p2 = np.linspace(11, 13, 3)
d = {'t': p1, 'e': p0, 'p': p2}
scan.define_scan(d, "C", "1s")
ae = np.asarray([20])
at = np.asarray([1, 2, 3, 4])
ap = np.asarray([11, 12, 13])
aa = np.asarray([0])
np.testing.assert_array_almost_equal(scan.energies, ae)
np.testing.assert_array_almost_equal(scan.thetas, at)
np.testing.assert_array_almost_equal(scan.phis, ap)
np.testing.assert_array_almost_equal(scan.alphas, aa)
re = np.ones(12) * 20
rt = np.asarray([1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])
rp = np.asarray([11, 12, 13, 11, 12, 13, 11, 12, 13, 11, 12, 13])
ra = np.ones(12) * 0
np.testing.assert_array_almost_equal(scan.raw_data['e'], re)
np.testing.assert_array_almost_equal(scan.raw_data['t'], rt)
np.testing.assert_array_almost_equal(scan.raw_data['p'], rp)
np.testing.assert_array_almost_equal(scan.raw_data['a'], ra)
class TestProject(unittest.TestCase):
def setUp(self):
# before each test method