public distro 2.1.0

This commit is contained in:
2019-07-19 12:54:54 +02:00
parent acea809e4e
commit fbd2d4fa8c
40 changed files with 2813 additions and 345 deletions

358
pmsco/cluster.py Normal file → Executable file
View File

@ -1,12 +1,15 @@
#!/usr/bin/env python
"""
@package pmsco.cluster
cluster tools for MSC and EDAC
cluster building and handling
the Cluster class is provided to facilitate the construction and import/export of clusters.
a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
the class can import from/export to EDAC, MSC, and XYZ cluster files.
the class can import from/export to various file formats.
XYZ allows for export to 3D visualizers, e.g. Avogadro.
the module has a command line interface to convert cluster files.
@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
@code{.sh}
pip install --user periodictable
@ -14,7 +17,11 @@ pip install --user periodictable
@author Matthias Muntwiler
@copyright (c) 2015-18 by Paul Scherrer Institut
@copyright (c) 2015-19 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
"""
from __future__ import absolute_import
@ -34,6 +41,12 @@ FMT_MSC = 1
FMT_EDAC = 2
## XYZ file format identifier
FMT_XYZ = 3
## PHAGEN output file format identifier
FMT_PHAGEN_OUT = 4
## PHAGEN input file format identifier
FMT_PHAGEN_IN = 5
## native file format identifier
FMT_PMSCO = 6
# python version dependent type of chemical symbol
if sys.version_info[0] >= 3:
@ -43,11 +56,14 @@ else:
## numpy.array datatype of Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
('e', 'u1')]
('e', 'u1'), ('q', 'f4'), ('c', 'i4')]
## file format of internal Cluster.data array
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"]
## field (column) names of internal Cluster.data array
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'x', 'y', 'z', 'e']
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q']
## column names for export
NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z',
'e': 'emitter', 'q': 'charge'}
## numpy.array datatype of cluster for MSC cluster file input/output
DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
@ -57,11 +73,11 @@ FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']
## numpy.array datatype of cluster for EDAC cluster file input/output
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
## file format of EDAC cluster file
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of EDAC cluster file
FIELDS_CLUSTER_EDAC = ['i', 't', 'x', 'y', 'z']
FIELDS_CLUSTER_EDAC = ['i', 'c', 'x', 'y', 'z']
## numpy.array datatype of cluster for XYZ file input/output
DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
@ -70,6 +86,44 @@ FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
## field (column) names of XYZ cluster file
FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']
## numpy.array datatype of cluster for PHAGEN output file input/output
DTYPE_CLUSTER_PHAGEN_OUT = [('i', 'i4'), ('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('c', 'i4')]
## file format of PHAGEN cluster output file
FMT_CLUSTER_PHAGEN_OUT = ["%5u", "%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%5u"]
## field (column) names of PHAGEN cluster output file
FIELDS_CLUSTER_PHAGEN_OUT = ['i', 's', 't', 'x', 'y', 'z', 'c']
## numpy.array datatype of cluster for PHAGEN input file input/output
DTYPE_CLUSTER_PHAGEN_IN = [('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('q', 'f4')]
## file format of PHAGEN input file, cluster section
FMT_CLUSTER_PHAGEN_IN = ["%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of PHAGEN input file, cluster section
FIELDS_CLUSTER_PHAGEN_IN = ['s', 't', 'x', 'y', 'z', 'q']
## dictionary of supported cluster data types
CLUSTER_DTYPES = {FMT_DEFAULT: DTYPE_CLUSTER_INTERNAL,
FMT_MSC: DTYPE_CLUSTER_MSC,
FMT_EDAC: DTYPE_CLUSTER_EDAC,
FMT_XYZ: DTYPE_CLUSTER_XYZ,
FMT_PHAGEN_OUT: DTYPE_CLUSTER_PHAGEN_OUT,
FMT_PHAGEN_IN: DTYPE_CLUSTER_PHAGEN_IN}
## dictionary of supported cluster file formats
CLUSTER_FMTS = {FMT_DEFAULT: FMT_CLUSTER_INTERNAL,
FMT_MSC: FMT_CLUSTER_MSC,
FMT_EDAC: FMT_CLUSTER_EDAC,
FMT_XYZ: FMT_CLUSTER_XYZ,
FMT_PHAGEN_OUT: FMT_CLUSTER_PHAGEN_OUT,
FMT_PHAGEN_IN: FMT_CLUSTER_PHAGEN_IN}
## dictionary of supported cluster field names
CLUSTER_FIELDS = {FMT_DEFAULT: FIELDS_CLUSTER_INTERNAL,
FMT_MSC: FIELDS_CLUSTER_MSC,
FMT_EDAC: FIELDS_CLUSTER_EDAC,
FMT_XYZ: FIELDS_CLUSTER_XYZ,
FMT_PHAGEN_OUT: FIELDS_CLUSTER_PHAGEN_OUT,
FMT_PHAGEN_IN: FIELDS_CLUSTER_PHAGEN_IN}
class Cluster(object):
"""
@ -84,6 +138,8 @@ class Cluster(object):
- t coordinate of the atom position
- z coordinate of the atom position
- emitter flag
- charge/ionicity
- scatterer class
the class also defines methods that add or manipulate atoms of the cluster.
see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
@ -126,6 +182,8 @@ class Cluster(object):
# @arg @c 'y' (float32) t coordinate of the atom position
# @arg @c 'z' (float32) z coordinate of the atom position
# @arg @c 'e' (uint8) 1 = emitter, 0 = regular atom
# @arg @c 'q' (float32) charge/ionicity
# @arg @c 'c' (int) scatterer class
## @var comment (str)
# one-line comment that can be included in some cluster files
@ -152,6 +210,9 @@ class Cluster(object):
@param cluster: (Cluster) other Cluster object.
"""
self.data = cluster.data.copy()
self.rmax = cluster.rmax
self.dtype = cluster.dtype
self.comment = cluster.comment
def set_rmax(self, r):
"""
@ -166,7 +227,7 @@ class Cluster(object):
"""
self.rmax = r
def build_element(self, index, element_number, x, y, z, emitter):
def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer=0):
"""
build a tuple in the format of the internal data array.
@ -177,12 +238,16 @@ class Cluster(object):
@param x, y, z: (float) atom coordinates in the cluster
@param emitter: (int or bool) True = emitter, False = scatterer
@param charge: (float) ionicity. default = 0
@param scatterer: (int) scatterer class. default = 0.
"""
symbol = pt.elements[element_number].symbol
element = (index, element_number, symbol, x, y, z, int(emitter))
element = (index, element_number, symbol, x, y, z, int(emitter), charge, scatterer)
return element
def add_atom(self, atomtype, v_pos, is_emitter):
def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.):
"""
add a single atom to the cluster.
@ -191,11 +256,15 @@ class Cluster(object):
@param v_pos: (numpy.ndarray, shape = (3)) position vector
@param is_emitter: (int or bool) True = emitter, False = scatterer
@param charge: (float) ionicity. default = 0
@return array index of added atom
"""
n0 = self.data.shape[0] + 1
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], int(is_emitter))
self.data = np.append(self.data, np.array(element,
dtype=self.data.dtype))
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter, charge)
self.data = np.append(self.data, np.array(element, dtype=self.data.dtype))
return n0 - 1
def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
"""
@ -290,19 +359,21 @@ class Cluster(object):
source = cluster.data.copy()
if check_rmax and source.shape[0] > 0:
source_xyz = source[['x', 'y', 'z']].copy()
source_xyz = source_xyz.view((source_xyz.dtype[0], len(source_xyz.dtype.names)))
source_xyz = cluster.get_positions()
b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
idx = np.where(b_rmax)
source = source[idx]
data = np.append(data, source)
if check_unique and data.shape[0] > 0:
data_xyz = data[['x', 'y', 'z']].copy()
data_xyz = data_xyz.view((data_xyz.dtype[0], len(data_xyz.dtype.names)))
tol_xyz = np.round(data_xyz / tol)
uni_xyz = tol_xyz.view(tol_xyz.dtype.descr * 3)
_, idx = np.unique(uni_xyz, return_index=True)
data_xyz = np.empty((data.shape[0], 3))
data_xyz[:, 0] = data['x']
data_xyz[:, 1] = data['y']
data_xyz[:, 2] = data['z']
tol *= 2
uni_xyz = np.round(data_xyz / tol)
# this requires numpy 1.13 or later
_, idx = np.unique(uni_xyz, return_index=True, axis=0)
data = data[np.sort(idx)]
self.data = data
@ -322,8 +393,10 @@ class Cluster(object):
the returned coordinates may not be identical to any atom coordinate of a layer
but deviate up to the given tolerance.
"""
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
z2 = np.round(self_z.copy() / tol)
tol *= 2
self_z = np.empty(self.data.shape, np.float32)
self_z[:] = self.data['z']
z2 = np.round(self_z / tol)
layers = np.unique(z2) * tol
return layers
@ -338,7 +411,8 @@ class Cluster(object):
by default (element = 0), all atoms are moved.
@return: (numpy.ndarray) indices of the atoms that have been shifted.
"""
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
self_z = np.empty(self.data.shape, np.float32)
self_z[:] = self.data['z']
b_z = self_z <= z_cut
b_all = b_z
@ -434,12 +508,18 @@ class Cluster(object):
"""
find all atoms which occupy a given position.
@param pos: (numpy.array, shape = (3)) position vector.
@param pos: position vector.
this can be a numpy.ndarray with shape (3)
or any type where pos[0] represents the x-coordinate, pos[1] y, and pos[2] z.
@param tol: (float) matching tolerance per coordinate.
@return numpy.array of indices which match v_pos.
"""
if isinstance(pos, np.ndarray):
assert pos.shape == (3,)
else:
pos = np.array((pos[0], pos[1], pos[2]))
b2 = np.abs(pos - self.get_positions()) < tol
b1 = np.all(b2, axis=1)
idx = np.where(b1)
@ -463,8 +543,9 @@ class Cluster(object):
@return numpy.array of indices which match v_pos.
"""
pos_xy = pos[0:2]
self_xy = self.data[['x', 'y']].copy()
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
self_xy = np.empty((self.data.shape[0], 2), np.float32)
self_xy[:, 0] = self.data['x']
self_xy[:, 1] = self.data['y']
b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy
pos_z = pos[2]
@ -497,8 +578,9 @@ class Cluster(object):
@return: None
"""
self_xy = self.data[['x', 'y']].copy()
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
self_xy = np.empty((self.data.shape[0], 2), np.float32)
self_xy[:, 0] = self.data['x']
self_xy[:, 1] = self.data['y']
b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy
self_z = self.data['z']
@ -545,8 +627,7 @@ class Cluster(object):
@return: None
"""
self_xyz = self.data[['x', 'y', 'z']].copy()
self_xyz = self_xyz.view((self_xyz.dtype[0], len(self_xyz.dtype.names)))
self_xyz = self.get_positions()
b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
idx = np.where(b_xyz)
self.data = self.data[idx]
@ -562,7 +643,8 @@ class Cluster(object):
@return: None
"""
coord = self.data[axis].view(np.float32).reshape(self.data.shape)
coord = np.empty(self.data.shape, np.float32)
coord[:] = self.data[axis]
sel = np.abs(coord - center) <= depth / 2
idx = np.where(sel)
self.data = self.data[idx]
@ -617,15 +699,17 @@ class Cluster(object):
def get_positions(self):
"""
get an array of the atom coordinates.
get the atom coordinates in a two-dimensional array.
the returned array is an independent copy of the original data.
changes will not affect the original cluster.
@return numpy.ndarray, shape = (N,3)
"""
pos = self.data[['x', 'y', 'z']].copy()
pos = pos.view((pos.dtype[0], len(pos.dtype.names)))
pos = np.empty((self.data.shape[0], 3), np.float32)
pos[:, 0] = self.data['x']
pos[:, 1] = self.data['y']
pos[:, 2] = self.data['z']
return pos
def set_positions(self, positions):
@ -689,14 +773,16 @@ class Cluster(object):
rec = self.data[index]
return rec['s']
def get_emitters(self):
def get_emitters(self, fields):
"""
get a list of all emitters.
@return list of tuples (x, y, z, atomtype)
@param fields: list of field (column) names to return
@return list of tuples. each tuple contains the values of the requested fields.
"""
idx = self.data['e'] != 0
ems = self.data[['x', 'y', 'z', 't']][idx]
ems = self.data[fields][idx]
return [tuple(em) for em in ems]
def get_emitter_count(self):
@ -711,10 +797,22 @@ class Cluster(object):
def load_from_file(self, f, fmt=FMT_DEFAULT):
"""
load a cluster from a file created by the scattering program.
the file formats differ in the columns that they contain.
only the 'x', 'y', 'z' coordinates are common to all formats.
at least one of the 's' and 't' columns must be present.
missing columns are initialized as follows.
@arg 'i': reset to a 1-based sequential index (@ref update_index).
@arg 's': derived from the 't' column (@ref update_symbols).
@arg 't': derived from the 's' column (@ref update_atomtypes).
@arg 'e': set to 0.
@arg 'c': set equal to the 't' column (@ref init_atomclasses).
@arg 'q': set to 0.
@param f (string/handle): path name or open file handle of the cluster file.
@param f: path name or open file handle of the cluster file.
@param fmt (int): file format.
@param fmt: file format.
must be one of the FMT_ constants.
if FMT_DEFAULT, self.file_format is used.
@ -735,12 +833,25 @@ class Cluster(object):
dtype = DTYPE_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
sh = 2
elif fmt == FMT_PHAGEN_OUT:
dtype = DTYPE_CLUSTER_PHAGEN_OUT
fields = FIELDS_CLUSTER_PHAGEN_OUT
sh = 1
elif fmt == FMT_PHAGEN_IN:
dtype = DTYPE_CLUSTER_PHAGEN_IN
fields = FIELDS_CLUSTER_PHAGEN_IN
sh = 0
elif fmt == FMT_PMSCO:
dtype = DTYPE_CLUSTER_INTERNAL
fields = FIELDS_CLUSTER_INTERNAL
sh = 1
else:
dtype = DTYPE_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
sh = 2
raise ValueError("unknown file format {}".format(fmt))
data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1:
data = data[:-1]
self.data = np.empty(data.shape, dtype=self.dtype)
self.data['x'] = data['x']
self.data['y'] = data['y']
@ -753,14 +864,23 @@ class Cluster(object):
self.data['t'] = data['t']
if 's' in fields:
self.data['s'] = data['s']
else:
elif 't' in fields:
self.update_symbols()
if 't' not in fields:
self.update_atomtypes()
if 's' in fields:
self.update_atomtypes()
if 'e' in fields:
self.data['e'] = data['e']
else:
self.data['e'] = 0
if 'c' in fields:
self.data['c'] = data['c']
else:
self.data['c'] = 0
if 'q' in fields:
self.data['q'] = data['q']
else:
self.data['q'] = 0.
pos = self.get_positions()
# note: np.linalg.norm does not accept axis argument in version 1.7
@ -788,6 +908,35 @@ class Cluster(object):
for atom in self.data:
atom['t'] = pt.elements.symbol(atom['s'].strip()).number
def init_atomclasses(self, field_or_value='t', default_only=False):
"""
initialize atom classes from atom types.
atom classes identify the atomic scattering potential or scattering factors
to be used in the multiple scattering program.
if the scattering factors are calculated in the PMSCO process (by EDAC or PHAGEN),
the atom classes must be set equal to the element type
or left at the default value 0 in which case PMSCO sets the correct values.
if the scattering factors are loaded from existing files,
the atom class corresponds to the key of the pmsco.project.Params.phase_files dictionary.
in this case the meaning of the class value is up to the project,
and the class must be set either by the cluster generator
or the project's after_atomic_scattering hook.
@param field_or_value: name of a cluster data field, e.g. 't', or an integer constant.
@param default_only: initialize classes only if they are at their default value (0).
@return None
"""
if not default_only or np.sum(np.abs(self.data['c'])) == 0:
if isinstance(field_or_value, str):
self.data['c'] = self.data[field_or_value]
else:
self.data['c'] = field_or_value
def update_index(self):
"""
update the index column.
@ -795,10 +944,44 @@ class Cluster(object):
if you have modified the order or number of elements in the self.data array directly,
you may need to re-index the atoms if your code uses functions that rely on the index.
@return: None
@return None
"""
self.data['i'] = np.arange(1, self.data.shape[0] + 1)
def update_atoms(self, clu, fields):
"""
update atom properties from another cluster.
this method copies selected fields from another cluster.
the other cluster must contain the same atoms (same coordinates) in a possibly random order.
the atoms of this and the other cluster are matched up by sorting them by coordinate.
atomic scattering calculators often change the order of atoms in a cluster based on symmetry,
and return atom classes versus atomic coordinates.
this method allows to import the atom classes into the original cluster.
the method checks that the other cluster contains the same number of atoms.
it does not check that the clusters contain the same atomic positions.
linear translations are acceptable.
@param clu: cluster.Cluster object
@param fields: subset of field names out of FIELDS_CLUSTER_INTERNAL.
'i', 'x', 'y', 'z' are ignored.
the set can be specified in any type that converts into a set of strings.
@return: None
@raise AssertError if the clusters do not contain the same number of atoms
"""
assert self.data.shape == clu.data.shape
fields = set(fields) - {'i', 'x', 'y', 'z'}
common_order = ('z', 'y', 'x')
index_self = np.argsort(self.data, order=common_order)
index_other = np.argsort(clu.data, order=common_order)
for field in fields:
self.data[field][index_self] = clu.data[field][index_other]
def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
"""
save the cluster to a file which can be read by the scattering program.
@ -846,10 +1029,21 @@ class Cluster(object):
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
elif fmt == FMT_PHAGEN_IN:
file_format = FMT_CLUSTER_PHAGEN_IN
fields = FIELDS_CLUSTER_PHAGEN_IN
header = None
elif fmt == FMT_PHAGEN_OUT:
file_format = FMT_CLUSTER_PHAGEN_OUT
fields = FIELDS_CLUSTER_PHAGEN_OUT
header = ""
elif fmt == FMT_PMSCO:
file_format = FMT_CLUSTER_INTERNAL
fields = FIELDS_CLUSTER_INTERNAL
names = NAMES_CLUSTER_INTERNAL
header = "# " + " ".join([names[field] for field in fields])
else:
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
raise ValueError("unknown file format {}".format(fmt))
data = data[fields]
np.savetxt(f, data, fmt=file_format, header=header, comments="")
@ -996,3 +1190,67 @@ class LegacyClusterGenerator(ClusterGenerator):
redirect the call to the corresponding project method.
"""
return self.project.create_cluster(model, index)
def parse_cli():
"""
parse the command line
@return: Namespace object created by the argument parser.
"""
import argparse
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""
cluster conversion
""")
format_choices = ["PMSCO", "MSC", "EDAC", "XYZ", "PHAGEN_OUT", "PHAGEN_IN"]
parser.add_argument('input_format',
choices=format_choices,
help="format of input file")
parser.add_argument('input_file',
help="path and name of input file")
parser.add_argument('output_format',
choices=format_choices,
help="format of output file")
parser.add_argument('output_file',
help="path and name of output file")
args = parser.parse_args()
return args
def convert_cli(args):
"""
convert cluster files from one format into another
this function is part of the command line interface
@param args: command line arguments
@return: None
"""
clu = Cluster()
clu.file_format = FMT_PMSCO
input_format = globals()["FMT_" + args.input_format.upper()]
output_format = globals()["FMT_" + args.output_format.upper()]
clu.load_from_file(args.input_file, input_format)
clu.save_to_file(args.output_file, output_format)
def main_cli():
"""
command line interface to convert cluster files
see @ref convert_cli.
@return: None
"""
args = parse_cli()
convert_cli(args)
if __name__ == '__main__':
main_cli()
sys.exit(0)