pmsco-public/pmsco/cluster.py
matthias muntwiler bbd16d0f94 add files for public distribution
based on internal repository 0a462b6 2017-11-22 14:41:39 +0100
2017-11-22 14:55:20 +01:00

786 lines
27 KiB
Python

"""
@package pmsco.cluster
cluster tools for MSC and EDAC
the Cluster class is provided to facilitate the construction and import/export of clusters.
a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
the class can import from/export to EDAC, MSC, and XYZ cluster files.
XYZ allows for export to 3D visualizers, e.g. Avogadro.
@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
@code{.sh}
pip install --user periodictable
@endcode
@author Matthias Muntwiler
@copyright (c) 2015 by Paul Scherrer Institut
"""
import math
import numpy as np
import periodictable as pt
## default file format identifier
FMT_DEFAULT = 0
## MSC file format identifier
FMT_MSC = 1
## EDAC file format identifier
FMT_EDAC = 2
## XYZ file format identifier
FMT_XYZ = 3
## numpy.array datatype of Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i','i4'), ('t','i4'), ('s','a2'), ('x','f4'), ('y','f4'), ('z','f4'), ('e','u1')]
## file format of internal Cluster.data array
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%7.3f", "%7.3f", "%7.3f", "%1u"]
## field (column) names of internal Cluster.data array
FIELDS_CLUSTER_INTERNAL = ['i','t','s','x','y','z','e']
## numpy.array datatype of cluster for MSC cluster file input/output
DTYPE_CLUSTER_MSC = [('i','i4'), ('x','f4'), ('y','f4'), ('z','f4'), ('t','i4')]
## file format of MSC cluster file
FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
## field (column) names of MSC cluster file
FIELDS_CLUSTER_MSC = ['i','x','y','z','t']
## numpy.array datatype of cluster for EDAC cluster file input/output
DTYPE_CLUSTER_EDAC= [('i','i4'), ('t','i4'), ('x','f4'), ('y','f4'), ('z','f4')]
## file format of EDAC cluster file
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of EDAC cluster file
FIELDS_CLUSTER_EDAC = ['i','t','x','y','z']
## numpy.array datatype of cluster for XYZ file input/output
DTYPE_CLUSTER_XYZ= [('s','a2'), ('x','f4'), ('y','f4'), ('z','f4')]
## file format of XYZ cluster file
FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
## field (column) names of XYZ cluster file
FIELDS_CLUSTER_XYZ = ['s','x','y','z']
class Cluster(object):
"""
Represents a cluster of atoms by their coordinates and chemical element.
the object stores the following information per atom in the @ref data array:
- sequential atom index (1-based)
- atom type (chemical element number)
- chemical element symbol
- x coordinate of the atom position
- t coordinate of the atom position
- z coordinate of the atom position
- emitter flag
the class also defines methods that add or manipulate atoms of the cluster.
see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
emitters can be flagged by the set_emitter method.
you may also manipulate the data array directly.
in this case, be sure to keep the data array consistent.
the update methods can help to recreate the index, atom type or symbol columns.
the class can also load and save files in some simple formats.
"""
## @var rmax
# maximum distance of atoms from the origin.
#
# float, default = 0
#
# this parameter restricts the addition of new atoms.
# changing the parameter does not affect existing atoms.
# the default is 0 (no atom will be added!).
# you must set this parameter explicitly!
## @var dtype
# data type of the internal numpy.ndarray.
## @var file_format
# default file format.
#
# must be one of the FMT_MSC, FMT_EDAC, FMT_XYZ constants.
# the initial value is FMT_XYZ.
## @var data
# structured numpy array holding the atom positions.
#
# the columns of the array are:
# @arg @c 'i' (int) atom index (1-based)
# @arg @c 't' (int) atom type (chemical element number)
# @arg @c 's' (string) chemical element symbol
# @arg @c 'x' (float32) x coordinate of the atom position
# @arg @c 'y' (float32) t coordinate of the atom position
# @arg @c 'z' (float32) z coordinate of the atom position
# @arg @c 'e' (uint8) 1 = emitter, 0 = regular atom
## @var comment (str)
# one-line comment that can be included in some cluster files
def __init__(self):
self.data = None
self.rmax = 0.0
self.dtype = DTYPE_CLUSTER_INTERNAL
self.file_format = FMT_XYZ
self.comment = ""
self.clear()
def clear(self):
"""
Remove all atoms from the cluster.
"""
n_atoms = 0
self.data = np.zeros(n_atoms, dtype=self.dtype)
def copy_from(self, cluster):
"""
Copy the data from another cluster.
@param cluster (Cluster): other Cluster object.
"""
self.data = cluster.data.copy()
def set_rmax(self, r):
"""
set rmax, the maximum distance of atoms from the origin.
atoms with norm greater than rmax will not be added to the cluster
by the add_layer() and add_bulk() methods.
existing atoms are not affected when changing rmax.
you must set this parameter explicitly, as the default value is 0
(no atom will be added)!
"""
self.rmax = r
def build_element(self, index, element_number, x, y, z, emitter):
"""
build a tuple in the format of the internal data array.
@param index: (int) index
@param element_number: (int) chemical element number
@param x, y, z: (float) atom coordinates in the cluster
@param emitter: (uint) 1 = emitter, 0 = regular
"""
symbol = pt.elements[element_number].symbol
element = (index, element_number, symbol, x, y, z, emitter)
return element
def add_atom(self, atomtype, v_pos, is_emitter):
"""
add a single atom to the cluster.
@param atomtype: (int) chemical element number
@param v_pos: (numpy.ndarray, shape = (3)) position vector
@param is_emitter: (uint) 1 = emitter, 0 = regular
"""
n0 = self.data.shape[0] + 1
element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter)
self.data = np.append(self.data, np.array(element,
dtype=self.data.dtype))
def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
"""
add a layer of atoms to the cluster.
the layer is expanded up to the limit given by
self.rmax (maximum distance from the origin).
all atoms are non-emitters.
@param atomtype: (int) chemical element number
@param v_pos: (numpy.ndarray, shape = (3))
position vector of the first atom (basis vector)
@param v_lat1, v_lat2: (numpy.ndarray, shape = (3))
lattice vectors.
"""
r_great = max(self.rmax, np.linalg.norm(v_pos))
n0 = self.data.shape[0] + 1
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 3) * 2
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 3) * 2
nn = 0
buf = np.empty((2 * n1 + 1) * (2 * n2 + 1), dtype=self.dtype)
for i1 in range(-n1, n1 + 1):
for i2 in range(-n2, n2 + 1):
v = v_pos + v_lat1 * i1 + v_lat2 * i2
if np.linalg.norm(v) <= self.rmax:
buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0)
nn += 1
buf = np.resize(buf, nn)
self.data = np.append(self.data, buf)
def add_bulk(self, atomtype, v_pos, v_lat1, v_lat2, v_lat3, z_surf=0.0):
"""
add bulk atoms to the cluster.
the lattice is expanded up to the limits given by
self.rmax (maximum distance from the origin)
and z_surf (position of the surface).
all atoms are non-emitters.
@param atomtype: (int) chemical element number
@param v_pos: (numpy.ndarray, shape = (3))
position vector of the first atom (basis vector)
@param v_lat1, v_lat2, v_lat3: (numpy.ndarray, shape = (3))
lattice vectors.
@param z_surf: (float) position of surface.
atoms with z > z_surf are not added.
"""
r_great = max(self.rmax, np.linalg.norm(v_pos))
n0 = self.data.shape[0] + 1
n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3
n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3
n3 = max(int(r_great / np.linalg.norm(v_lat3)) + 1, 4) * 3
nn = 0
buf = np.empty((2 * n1 + 1) * (2 * n2 + 1) * (n3 + 1), dtype=self.dtype)
for i1 in range(-n1, n1 + 1):
for i2 in range(-n2, n2 + 1):
for i3 in range(-n3, n3 + 1):
v = v_pos + v_lat1 * i1 + v_lat2 * i2 + v_lat3 * i3
if np.linalg.norm(v) <= self.rmax and v[2] <= z_surf:
buf[nn] = self.build_element(nn + n0, atomtype, v[0], v[1], v[2], 0)
nn += 1
buf = np.resize(buf, nn)
self.data = np.append(self.data, buf)
def add_cluster(self, cluster, check_rmax=False, check_unique=False, tol=0.001):
"""
add atoms from another cluster object.
@note the order of atoms in the internal data array may change during this operation.
the atom index is updated.
@param cluster: Cluster object to be added.
@param check_rmax: if True, atoms outside self.rmax are not added.
if False (default), all atoms of the other cluster are added.
@param check_unique: if True, atoms occupying the same position as an existing atom will not be added.
if False (default), all atoms are added even if they occupy the same position.
@param tol: tolerance for checking uniqueness.
positions of two atoms are considered equal if all coordinates lie within the tolerance interval.
@return: None
"""
assert isinstance(cluster, Cluster)
data = self.data.copy()
source = cluster.data.copy()
if check_rmax and source.shape[0] > 0:
source_xyz = source[['x', 'y', 'z']].copy()
source_xyz = source_xyz.view((source_xyz.dtype[0], len(source_xyz.dtype.names)))
b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
idx = np.where(b_rmax)
source = source[idx]
data = np.append(data, source)
if check_unique and data.shape[0] > 0:
data_xyz = data[['x', 'y', 'z']].copy()
data_xyz = data_xyz.view((data_xyz.dtype[0], len(data_xyz.dtype.names)))
tol_xyz = np.round(data_xyz / tol)
uni_xyz = tol_xyz.view(tol_xyz.dtype.descr * 3)
_, idx = np.unique(uni_xyz, return_index=True)
data = data[np.sort(idx)]
self.data = data
self.update_index()
def get_z_layers(self, tol=0.001):
"""
return the z-coordinates of atomic layers.
the layers are stacked in the z-direction.
the function gathers unique z-coordinates.
coordinates which are within the given tolerance are assigned to the same layer.
@param tol: tolerance
@return: (numpy.ndarray) z-coordinates of the layers.
the coordinates are numerically ordered, the top layer appears last.
the returned coordinates may not be identical to any atom coordinate of a layer
but deviate up to the given tolerance.
"""
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
z2 = np.round(self_z.copy() / tol)
layers = np.unique(z2) * tol
return layers
def relax(self, z_cut, z_shift, element=0):
"""
shift atoms below a certain z coordinate by a fixed distance in the z direction.
@param z_cut: atoms below this z coordinate are shifted.
@param z_shift: amount of shift in z direction
(positive to move towards the surface, negative to move into the bulk).
@param element: (int) chemical element number if atoms of a specific element should be affected.
by default (element = 0), all atoms are moved.
@return: (numpy.ndarray) indices of the atoms that have been shifted.
"""
self_z = self.data['z'].view(np.float32).reshape(self.data.shape)
b_z = self_z <= z_cut
b_all = b_z
if element:
try:
b_el = self.data['t'] == int(element)
except ValueError:
b_el = self.data['s'] == element
b_all = np.all([b_z, b_el], axis=0)
idx = np.where(b_all)
self.data['z'][idx] += z_shift
return idx
def matrix_transform(self, matrix):
"""
apply a transformation matrix to each atom of the cluster.
the transformed atom positions are calculated as v = R * transpose(v)
@param matrix: transformation matrix
@return: None
"""
for atom in self.data:
v = np.matrix([atom['x'], atom['y'], atom['z']])
w = matrix * v.transpose()
atom['x'] = float(w[0])
atom['y'] = float(w[1])
atom['z'] = float(w[2])
def rotate_x(self, angle):
"""
rotate cluster about the surface normal axis
@param angle (float) in degrees
"""
angle = math.radians(angle)
s = math.sin(angle)
c = math.cos(angle)
matrix = np.matrix([[1, 0, 0], [0, c, -s], [0, s, c]])
self.matrix_transform(matrix)
def rotate_y(self, angle):
"""
rotate cluster about the surface normal axis
@param angle (float) in degrees
"""
angle = math.radians(angle)
s = math.sin(angle)
c = math.cos(angle)
matrix = np.matrix([[c, 0, s], [0, 1, 0], [-s, 0, c]])
self.matrix_transform(matrix)
def rotate_z(self, angle):
"""
rotate cluster about the surface normal axis
@param angle (float) in degrees
"""
angle = math.radians(angle)
s = math.sin(angle)
c = math.cos(angle)
matrix = np.matrix([[c, -s, 0], [s, c, 0], [0, 0, 1]])
self.matrix_transform(matrix)
def find_positions(self, pos, tol=0.001):
"""
find all atoms which occupy a given position.
@param pos: (numpy.array, shape = (3)) position vector.
@param tol: (float) matching tolerance per coordinate.
@return numpy.array of indices which match v_pos.
"""
b2 = np.abs(pos - self.get_positions()) < tol
b1 = np.all(b2, axis=1)
idx = np.where(b1)
return idx[0]
def find_index_cylinder(self, pos, r_xy, r_z, element):
"""
find atoms of a given element within a cylindrical volume and return their indices.
@param pos: (numpy.array, shape = (3)) center position of the cylinder.
@param r_xy: (float) radius of the cylinder.
returned atoms must match |atom(x,y) - pos(x,y)| <= r_xy.
@param r_z: (float) half height of the cylinder.
returned atoms must match |atom(z) - pos(z)| <= r_z.
@param element: (str or int) element symbol or atomic number.
if None, the element is not checked.
@return numpy.array of indices which match v_pos.
"""
pos_xy = pos[0:2]
self_xy = self.data[['x', 'y']].copy()
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy
pos_z = pos[2]
self_z = self.data['z']
b_z = np.abs(self_z - pos_z) <= r_z
if element is not None:
try:
b_el = self.data['t'] == int(element)
except ValueError:
b_el = self.data['s'] == element
b_all = np.all([b_xy, b_z, b_el], axis=0)
else:
b_all = np.all([b_xy, b_z], axis=0)
idx = np.where(b_all)
return idx[0]
def trim_cylinder(self, r_xy, r_z):
"""
remove atoms outside a given cylinder.
the cylinder is centered at the origin.
@param r_xy: (float) radius of the cylinder.
atoms to keep must match |atom(x,y)| <= r_xy.
@param r_z: (float) half height of the cylinder.
atoms to keep must match |atom(z)| <= r_z.
@return: None
"""
self_xy = self.data[['x', 'y']].copy()
self_xy = self_xy.view((self_xy.dtype[0], len(self_xy.dtype.names)))
b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy
self_z = self.data['z']
b_z = np.abs(self_z) <= r_z
b_all = np.all([b_xy, b_z], axis=0)
idx = np.where(b_all)
self.data = self.data[idx]
self.update_index()
def trim_sphere(self, radius):
"""
remove atoms outside a given sphere.
the sphere is centered at the origin.
@param radius: (float) radius of the sphere.
atoms to keep must match |atom(x,y,z)| <= radius.
@return: None
"""
self_xyz = self.data[['x', 'y', 'z']].copy()
self_xyz = self_xyz.view((self_xyz.dtype[0], len(self_xyz.dtype.names)))
b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
idx = np.where(b_xyz)
self.data = self.data[idx]
self.update_index()
def trim_slab(self, axis, center, depth):
"""
remove atoms outside a slab that is parallel to one of the coordinate planes.
@param axis: axis to trim: 'x', 'y' or 'z'.
@param center: center position of the slab.
@param depth: thickness of the slab.
@return: None
"""
coord = self.data[axis].view(np.float32).reshape(self.data.shape)
sel = np.abs(coord - center) <= depth / 2
idx = np.where(sel)
self.data = self.data[idx]
self.update_index()
def set_emitter(self, pos=None, idx=-1, tol=0.001):
"""
select an atom as emitter.
the emitter atom can be specified by position or index.
either one of the pos or idx arguments must be specified.
@param idx: (int) array index of the atom.
@param pos: (numpy.array, shape = (3)) position vector.
@param tol: (float) matching tolerance per component if pos argument is used.
@raise IndexError if the position cannot be found
"""
if pos is not None:
ares = self.find_positions(pos, tol)
idx = ares[0]
item = self.data[idx]
item['e'] = 1
def move_to_first(self, pos=None, idx=0, tol=0.001):
"""
move an atom to the first position.
the emitter atom can be specified by position or index.
either one of the pos or idx arguments must be specified.
@param idx: (int) array index of the atom.
must be greater than 1 to have an effect.
@param pos: (numpy.array, shape = (3)) position vector.
@param tol: (float) matching tolerance per component if pos argument is used.
@raise IndexError if the position cannot be found
"""
if pos is not None:
ares = self.find_positions(pos, tol)
idx = ares[0]
if idx:
em = self.data[idx]
self.data = np.delete(self.data, idx)
self.data = np.insert(self.data, 0, em)
self.update_index()
def get_positions(self):
"""
get an array of the atom coordinates.
the returned array is an independent copy of the original data.
changes will not affect the original cluster.
@return numpy.ndarray, shape = (N,3)
"""
pos = self.data[['x', 'y', 'z']].copy()
pos = pos.view((pos.dtype[0], len(pos.dtype.names)))
return pos
def set_positions(self, positions):
"""
set atom coordinates from an array of shape (N,3).
this method can be used on a modified array obtained from get_positions.
N must be the number of atoms defined in the cluster.
@param positions: numpy.ndarray of shape (N,3) where N is the number of atoms in this cluster.
@return: None
@raise AssertionError if the array sizes do not match.
"""
assert isinstance(positions, np.ndarray)
assert positions.shape == (self.data.shape[0], 3)
self.data['x'] = positions[:, 0]
self.data['y'] = positions[:, 1]
self.data['z'] = positions[:, 2]
def get_position(self, index):
"""
get the position of a single atom.
@param index: (int) index of the atom.
@return numpy.array, shape = (3): position vector.
the array instance is independent from the original array.
"""
rec = self.data[index]
return np.array((rec['x'], rec['y'], rec['z']))
def get_atom_count(self):
"""
get the number of atoms (positions) in the cluster.
@return the number of atoms in the cluster.
"""
return self.data.shape[0]
def get_atomtype(self, index):
"""
get the chemical element number of an atom.
@param index: (int) index of the atom.
@return int: chemical element number.
"""
rec = self.data[index]
return rec['t']
def get_symbol(self, index):
"""
get the chemical element symbol of an atom.
@param index: (int) index of the atom.
@return string: chemical element symbol.
"""
rec = self.data[index]
return rec['s']
def get_emitters(self):
"""
get a list of all emitters.
@return list of tuples (x, y, z, atomtype)
"""
idx = self.data['e'] != 0
ems = self.data[['x', 'y', 'z', 't']][idx]
return map(tuple, ems)
def get_emitter_count(self):
"""
get the number of emitters in the cluster.
@return the number of atoms marked as emitter.
"""
idx = self.data['e'] != 0
return np.sum(idx)
def load_from_file(self, f, fmt=FMT_DEFAULT):
"""
load a cluster from a file created by the scattering program.
@param f (string/handle): path name or open file handle of the cluster file.
@param fmt (int): file format.
must be one of the FMT_ constants.
if FMT_DEFAULT, self.file_format is used.
@remark if the filename ends in .gz, the file is loaded from compressed gzip format
"""
if fmt == FMT_DEFAULT:
fmt = self.file_format
if fmt == FMT_MSC:
dtype = DTYPE_CLUSTER_MSC
fields = FIELDS_CLUSTER_MSC
sh = 0
elif fmt == FMT_EDAC:
dtype = DTYPE_CLUSTER_EDAC
fields = FIELDS_CLUSTER_EDAC
sh = 1
elif fmt == FMT_XYZ:
dtype = DTYPE_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
sh = 2
else:
dtype = DTYPE_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
sh = 2
data = np.genfromtxt(f, dtype=dtype, skip_header=sh)
self.data = np.empty(data.shape, dtype=self.dtype)
self.data['x'] = data['x']
self.data['y'] = data['y']
self.data['z'] = data['z']
if 'i' in fields:
self.data['i'] = data['i']
else:
self.update_index()
if 't' in fields:
self.data['t'] = data['t']
if 's' in fields:
self.data['s'] = data['s']
else:
self.update_symbols()
if 't' not in fields:
self.update_atomtypes()
if 'e' in fields:
self.data['e'] = data['e']
else:
self.data['e'] = 0
pos = self.positions()
# note: np.linalg.norm does not accept axis argument in version 1.7
# (check np.version.version)
norm = np.sqrt(np.sum(pos**2, axis=1))
self.rmax = np.max(norm)
def update_symbols(self):
"""
update element symbols from element numbers.
if you have modified the element numbers in the self.data array directly,
this method updates the symbol column to make the data consistent.
"""
for atom in self.data:
atom['s'] = pt.elements[atom['t']].symbol
def update_atomtypes(self):
"""
update element numbers from element symbols.
if you have modified the element symbols in the self.data array directly,
this method updates the atom type column to make the data consistent.
"""
for atom in self.data:
atom['t'] = pt.elements.symbol(atom['s'].strip()).number
def update_index(self):
"""
update the index column.
if you have modified the order or number of elements in the self.data array directly,
you may need to re-index the atoms if your code uses functions that rely on the index.
@return: None
"""
self.data['i'] = np.arange(1, self.data.shape[0] + 1)
def save_to_file(self, f, fmt=FMT_DEFAULT, comment=""):
"""
save the cluster to a file which can be read by the scattering program.
the method updates the atom index because some file formats require an index column.
@param f: (string/handle) path name or open file handle of the cluster file.
@param fmt: (int) file format.
must be one of the FMT_ constants.
if FMT_DEFAULT, self.file_format is used.
@param comment: (str) comment line (second line) in XYZ file.
not used in other file formats.
by default, self.comment is used.
@remark if the filename ends in .gz, the file is saved in compressed gzip format
"""
if fmt == FMT_DEFAULT:
fmt = self.file_format
if not comment:
comment = self.comment
if fmt == FMT_MSC:
file_format = FMT_CLUSTER_MSC
fields = FIELDS_CLUSTER_MSC
header = ""
elif fmt == FMT_EDAC:
file_format = FMT_CLUSTER_EDAC
fields = FIELDS_CLUSTER_EDAC
header = "%u l(A)" % (self.data.shape[0])
elif fmt == FMT_XYZ:
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{0}\n{1}".format(self.data.shape[0], comment)
else:
file_format = FMT_CLUSTER_XYZ
fields = FIELDS_CLUSTER_XYZ
header = "{0}\n{1}".format(self.data.shape[0], comment)
self.update_index()
data = self.data[fields]
np.savetxt(f, data, fmt=file_format, header=header, comments="")