#!/usr/bin/env python
"""
@package pmsco.cluster
cluster building and handling

the Cluster class is provided to facilitate the construction and import/export of clusters.
a cluster can be built by adding single atoms, layers, or a half-space bulk lattice.
the class can import from/export to various file formats.
XYZ allows for export to 3D visualizers, e.g. Avogadro.

the module has a command line interface to convert cluster files.

@pre requires the periodictable package (https://pypi.python.org/pypi/periodictable)
@code{.sh}
pip install --user periodictable
@endcode

@author Matthias Muntwiler

@copyright (c) 2015-20 by Paul Scherrer Institut @n
Licensed under the Apache License, Version 2.0 (the "License"); @n
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import numpy as np
import periodictable as pt
import sys

## default file format identifier
FMT_DEFAULT = 0
## MSC file format identifier
FMT_MSC = 1
## EDAC file format identifier
FMT_EDAC = 2
## XYZ file format identifier
FMT_XYZ = 3
## PHAGEN output file format identifier
FMT_PHAGEN_OUT = 4
## PHAGEN input file format identifier
FMT_PHAGEN_IN = 5
## native file format identifier
FMT_PMSCO = 6

# python version dependent type of chemical symbol
if sys.version_info[0] >= 3:
    _SYMBOL_TYPE = 'U2'
else:
    _SYMBOL_TYPE = 'S2'

## numpy.array datatype of internal Cluster.data array
DTYPE_CLUSTER_INTERNAL = [('i', 'i4'), ('t', 'i4'), ('s', _SYMBOL_TYPE), ('c', 'i4'),
                          ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('e', 'u1'), ('q', 'f4')]
## string formatting of native file format
FMT_CLUSTER_INTERNAL = ["%5u", "%2u", "%s", "%5u", "%7.3f", "%7.3f", "%7.3f", "%1u", "%7.3f"]
## field (column) names of native file format
FIELDS_CLUSTER_INTERNAL = ['i', 't', 's', 'c', 'x', 'y', 'z', 'e', 'q']
## column names of native file format
NAMES_CLUSTER_INTERNAL = {'i': 'index', 't': 'element', 's': 'symbol', 'c': 'class', 'x': 'x', 'y': 'y', 'z': 'z',
                          'e': 'emitter', 'q': 'charge'}

## numpy.array datatype of cluster for MSC cluster file input/output
DTYPE_CLUSTER_MSC = [('i', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('t', 'i4')]
## file format of MSC cluster file
FMT_CLUSTER_MSC = ["%5u", "%7.3f", "%7.3f", "%7.3f", "%2u"]
## field (column) names of MSC cluster file
FIELDS_CLUSTER_MSC = ['i', 'x', 'y', 'z', 't']

## numpy.array datatype of cluster for EDAC cluster file input/output
DTYPE_CLUSTER_EDAC= [('i', 'i4'), ('c', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
## file format of EDAC cluster file
FMT_CLUSTER_EDAC = ["%5u", "%2u", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of EDAC cluster file
FIELDS_CLUSTER_EDAC = ['i', 'c', 'x', 'y', 'z']

## numpy.array datatype of cluster for XYZ file input/output
DTYPE_CLUSTER_XYZ= [('s', _SYMBOL_TYPE), ('x', 'f4'), ('y', 'f4'), ('z', 'f4')]
## file format of XYZ cluster file
FMT_CLUSTER_XYZ = ["%s", "%10.5f", "%10.5f", "%10.5f"]
## field (column) names of XYZ cluster file
FIELDS_CLUSTER_XYZ = ['s', 'x', 'y', 'z']

## numpy.array datatype of cluster for PHAGEN output file input/output
DTYPE_CLUSTER_PHAGEN_OUT = [('i', 'i4'), ('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('c', 'i4')]
## file format of PHAGEN cluster output file
FMT_CLUSTER_PHAGEN_OUT = ["%5u", "%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%5u"]
## field (column) names of PHAGEN cluster output file
FIELDS_CLUSTER_PHAGEN_OUT = ['i', 's', 't', 'x', 'y', 'z', 'c']

## numpy.array datatype of cluster for PHAGEN input file input/output
DTYPE_CLUSTER_PHAGEN_IN = [('s', _SYMBOL_TYPE), ('t', 'i4'), ('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('q', 'f4')]
## file format of PHAGEN input file, cluster section
FMT_CLUSTER_PHAGEN_IN = ["%s", "%2u", "%7.3f", "%7.3f", "%7.3f", "%7.3f"]
## field (column) names of PHAGEN input file, cluster section
FIELDS_CLUSTER_PHAGEN_IN = ['s', 't', 'x', 'y', 'z', 'q']

## dictionary of supported cluster data types
CLUSTER_DTYPES = {FMT_DEFAULT: DTYPE_CLUSTER_INTERNAL,
                  FMT_MSC: DTYPE_CLUSTER_MSC,
                  FMT_EDAC: DTYPE_CLUSTER_EDAC,
                  FMT_XYZ: DTYPE_CLUSTER_XYZ,
                  FMT_PHAGEN_OUT: DTYPE_CLUSTER_PHAGEN_OUT,
                  FMT_PHAGEN_IN: DTYPE_CLUSTER_PHAGEN_IN}

## dictionary of supported cluster file formats
CLUSTER_FMTS = {FMT_DEFAULT: FMT_CLUSTER_INTERNAL,
                FMT_MSC: FMT_CLUSTER_MSC,
                FMT_EDAC: FMT_CLUSTER_EDAC,
                FMT_XYZ: FMT_CLUSTER_XYZ,
                FMT_PHAGEN_OUT: FMT_CLUSTER_PHAGEN_OUT,
                FMT_PHAGEN_IN: FMT_CLUSTER_PHAGEN_IN}

## dictionary of supported cluster field names
CLUSTER_FIELDS = {FMT_DEFAULT: FIELDS_CLUSTER_INTERNAL,
                  FMT_MSC: FIELDS_CLUSTER_MSC,
                  FMT_EDAC: FIELDS_CLUSTER_EDAC,
                  FMT_XYZ: FIELDS_CLUSTER_XYZ,
                  FMT_PHAGEN_OUT: FIELDS_CLUSTER_PHAGEN_OUT,
                  FMT_PHAGEN_IN: FIELDS_CLUSTER_PHAGEN_IN}


class Cluster(object):
    """
    Represents a cluster of atoms by their coordinates and chemical element.
    
    the object stores the following information per atom in the @ref data array:
    
    - sequential atom index (1-based)
    - atom type (chemical element number)
    - chemical element symbol
    - x coordinate of the atom position
    - t coordinate of the atom position
    - z coordinate of the atom position
    - emitter flag
    - charge/ionicity
    - scatterer class
    
    the class also defines methods that add or manipulate atoms of the cluster.
    see most importantly the set_rmax, add_atom, add_layer and add_bulk functions.
    emitters can be flagged by the set_emitter method.
    
    you may also manipulate the data array directly.
    in this case, be sure to keep the data array consistent.
    the update methods can help to recreate the index, atom type or symbol columns.
    
    the class can also load and save files in some simple formats.
    """

    ## @var rmax
    #       maximum distance of atoms from the origin.
    #
    #       float, default = 0
    #
    #       this parameter restricts the addition of new atoms.
    #       changing the parameter does not affect existing atoms.
    #       the default is 0 (no atom will be added!).
    #       you must set this parameter explicitly!

    ##    @var dtype
    #       data type of the internal numpy.ndarray.
    
    ##    @var file_format
    #       default file format.
    #
    #       must be one of the FMT_MSC, FMT_EDAC, FMT_XYZ constants.
    #       the initial value is FMT_XYZ.
    
    ##    @var data
    #       structured numpy array holding the atom positions.
    #
    #       the columns of the array are:
    #       @arg @c 'i' (int) atom index (1-based)
    #       @arg @c 't' (int) atom type (chemical element number)
    #       @arg @c 's' (string) chemical element symbol
    #       @arg @c 'c' (int) scatterer class
    #       @arg @c 'x' (float32) x coordinate of the atom position
    #       @arg @c 'y' (float32) t coordinate of the atom position
    #       @arg @c 'z' (float32) z coordinate of the atom position
    #       @arg @c 'e' (uint8)   1 = emitter, 0 = regular atom
    #       @arg @c 'q' (float32) charge/ionicity

    ##  @var comment (str)
    #   one-line comment that can be included in some cluster files

    def __init__(self):
        self.data = None
        self.rmax = 0.0
        self.dtype = DTYPE_CLUSTER_INTERNAL
        self.file_format = FMT_XYZ
        self.comment = ""
        self.clear()

    def clear(self):
        """
        Remove all atoms from the cluster.
        """
        n_atoms = 0
        self.data = np.zeros(n_atoms, dtype=self.dtype)

    def copy_from(self, cluster):
        """
        Copy the data from another cluster.

        @param cluster: (Cluster) other Cluster object.
        """
        self.data = cluster.data.copy()
        self.rmax = cluster.rmax
        self.dtype = cluster.dtype
        self.comment = cluster.comment

    def set_rmax(self, r):
        """
        set rmax, the maximum distance of atoms from the origin.
        
        atoms with norm greater than rmax will not be added to the cluster
        by the add_layer() and add_bulk() methods.
        existing atoms are not affected when changing rmax.

        you must set this parameter explicitly, as the default value is 0
        (no atom will be added)!
        """
        self.rmax = r

    def build_element(self, index, element_number, x, y, z, emitter, charge=0., scatterer_class=0):
        """
        build a tuple in the format of the internal data array.
        
        @param index: (int) index
        
        @param element_number: (int) chemical element number
        
        @param x, y, z: (float) atom coordinates in the cluster
        
        @param emitter: (int or bool) True = emitter, False = scatterer

        @param charge: (float) ionicity. default = 0

        @param scatterer_class: (int) scatterer class. default = 0.
        """
        symbol = pt.elements[element_number].symbol
        element = (index, element_number, symbol, scatterer_class, x, y, z, int(emitter), charge)
        return element

    def add_atom(self, atomtype, v_pos, is_emitter=False, charge=0.):
        """
        add a single atom to the cluster.
        
        @param atomtype: (int) chemical element number
        
        @param v_pos: (numpy.ndarray, shape = (3)) position vector
        
        @param is_emitter: (int or bool) True = emitter, False = scatterer

        @param charge: (float) ionicity. default = 0

        @return array index of added atom
        """
        n0 = self.data.shape[0] + 1
        element = self.build_element(n0, atomtype, v_pos[0], v_pos[1], v_pos[2], is_emitter, charge)
        self.data = np.append(self.data, np.array(element, dtype=self.data.dtype))
        return n0 - 1

    def add_layer(self, atomtype, v_pos, v_lat1, v_lat2):
        """
        add a layer of atoms to the cluster.
        
        the layer is expanded up to the limit given by
        self.rmax (maximum distance from the origin).
        all atoms are non-emitters.
        
        @param atomtype: (int) chemical element number
        
        @param v_pos: (numpy.ndarray, shape = (3))
            position vector of the first atom (basis vector)
        
        @param v_lat1, v_lat2: (numpy.ndarray, shape = (3))
            lattice vectors.
        """
        r_great = max(self.rmax, np.linalg.norm(v_pos))
        n0 = self.data.shape[0] + 1
        n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3
        n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3
        idx = np.mgrid[-n1:n1+1, -n2:n2+1]
        idx = idx.reshape(idx.shape[0], -1)
        lat = np.array([v_lat1, v_lat2])
        v = v_pos + np.matmul(idx.T, lat)
        rsq = np.sum(np.square(v), axis=-1)
        b1 = rsq <= self.rmax**2
        sel = b1.nonzero()[0]
        buf = np.empty((len(sel)), dtype=self.dtype)
        for nn, ii in enumerate(sel):
            buf[nn] = self.build_element(nn + n0, atomtype, v[ii, 0], v[ii, 1], v[ii, 2], 0)
        self.data = np.append(self.data, buf)

    def add_bulk(self, atomtype, v_pos, v_lat1, v_lat2, v_lat3, z_surf=0.0):
        """
        add bulk atoms to the cluster.
        
        the lattice is expanded up to the limits given by 
        self.rmax (maximum distance from the origin)
        and z_surf (position of the surface).
        all atoms are non-emitters.

        @param atomtype: (int) chemical element number
        
        @param v_pos: (numpy.ndarray, shape = (3))
            position vector of the first atom (basis vector)
        
        @param v_lat1, v_lat2, v_lat3: (numpy.ndarray, shape = (3))
            lattice vectors.

        @param z_surf: (float) position of surface.
            atoms with z > z_surf are not added.
        """
        r_great = max(self.rmax, np.linalg.norm(v_pos))
        n0 = self.data.shape[0] + 1
        n1 = max(int(r_great / np.linalg.norm(v_lat1)) + 1, 4) * 3
        n2 = max(int(r_great / np.linalg.norm(v_lat2)) + 1, 4) * 3
        n3 = max(int(r_great / np.linalg.norm(v_lat3)) + 1, 4) * 3
        idx = np.mgrid[-n1:n1+1, -n2:n2+1, -n3:n3+1]
        idx = idx.reshape(idx.shape[0], -1)
        lat = np.array([v_lat1, v_lat2, v_lat3])
        v = v_pos + np.matmul(idx.T, lat)
        rsq = np.sum(np.square(v), axis=-1)
        b1 = rsq <= self.rmax**2
        b2 = v[:, 2] <= z_surf
        ba = np.all([b1, b2], axis=0)
        sel = ba.nonzero()[0]
        buf = np.empty((len(sel)), dtype=self.dtype)
        for nn, ii in enumerate(sel):
            buf[nn] = self.build_element(nn + n0, atomtype, v[ii, 0], v[ii, 1], v[ii, 2], 0)
        self.data = np.append(self.data, buf)

    def add_cluster(self, cluster, check_rmax=False, check_unique=False, tol=0.001):
        """
        add atoms from another cluster object.
        
        @note the order of atoms in the internal data array may change during this operation.
            the atom index is updated.
            
        @param cluster: Cluster object to be added.
         
        @param check_rmax: if True, atoms outside self.rmax are not added.
            if False (default), all atoms of the other cluster are added.
        
        @param check_unique: if True, atoms occupying the same position as an existing atom will not be added.
            if False (default), all atoms are added even if they occupy the same position.
            
        @param tol: tolerance for checking uniqueness.
            positions of two atoms are considered equal if all coordinates lie within the tolerance interval.

        @return: None 
        """
        assert isinstance(cluster, Cluster)
        data = self.data.copy()
        source = cluster.data.copy()

        if check_rmax and source.shape[0] > 0:
            source_xyz = cluster.get_positions()
            b_rmax = np.linalg.norm(source_xyz, axis=1) <= self.rmax
            idx = np.where(b_rmax)
            source = source[idx]
        data = np.append(data, source)

        if check_unique and data.shape[0] > 0:
            data_xyz = np.empty((data.shape[0], 3))
            data_xyz[:, 0] = data['x']
            data_xyz[:, 1] = data['y']
            data_xyz[:, 2] = data['z']
            tol *= 2
            uni_xyz = np.round(data_xyz / tol)
            # this requires numpy 1.13 or later
            _, idx = np.unique(uni_xyz, return_index=True, axis=0)
            data = data[np.sort(idx)]

        self.data = data
        self.update_index()

    def get_z_layers(self, tol=0.001):
        """
        return the z-coordinates of atomic layers.
        the layers are stacked in the z-direction.

        the function gathers unique z-coordinates.
        coordinates which are within the given tolerance are assigned to the same layer.

        @param tol: tolerance
        @return: (numpy.ndarray) z-coordinates of the layers.
            the coordinates are numerically ordered, the top layer appears last.
            the returned coordinates may not be identical to any atom coordinate of a layer
            but deviate up to the given tolerance.
        """
        tol *= 2
        self_z = np.empty(self.data.shape, np.float32)
        self_z[:] = self.data['z']
        z2 = np.round(self_z / tol)
        layers = np.unique(z2) * tol
        return layers

    def relax(self, z_cut, z_shift, element=0):
        """
        shift atoms below a certain z coordinate by a fixed distance in the z direction.

        @param z_cut: atoms below this z coordinate are shifted.
        @param z_shift: amount of shift in z direction
            (positive to move towards the surface, negative to move into the bulk).
        @param element: (int) chemical element number if atoms of a specific element should be affected.
            by default (element = 0), all atoms are moved.
        @return: (numpy.ndarray) indices of the atoms that have been shifted.
        """
        self_z = np.empty(self.data.shape, np.float32)
        self_z[:] = self.data['z']
        b_z = self_z <= z_cut
        b_all = b_z

        if element:
            try:
                b_el = self.data['t'] == int(element)
            except ValueError:
                b_el = self.data['s'] == element
            b_all = np.all([b_z, b_el], axis=0)

        idx = np.where(b_all)
        self.data['z'][idx] += z_shift

        return idx[0]

    def get_center(self, element=None):
        """
        get the geometric center of the cluster or a class of atoms.

        @param element: chemical element number (int) or symbol (str)
            if atoms of a specific element should be considered only.
            by default (element == None or 0 or ""),
            all atoms are included in the calculation.

        @return: (numpy.ndarray) 3-dimensional vector.
        """

        if element:
            try:
                sel = self.data['t'] == int(element)
            except ValueError:
                sel = self.data['s'] == element
        else:
            sel = np.ones_like(self.data['t'])
        idx = np.where(sel)
        center = np.zeros(3)
        center[0] = np.mean(self.data['x'][idx])
        center[1] = np.mean(self.data['y'][idx])
        center[2] = np.mean(self.data['z'][idx])
        return center

    def translate(self, vector, element=None):
        """
        translate the cluster or all atoms of a specified element.

        translation shifts each selected atom by the given vector.

        @param vector: (numpy.ndarray) 3-dimensional displacement vector.

        @param element: chemical element number (int) or symbol (str)
            if atoms of a specific element should be affected only.
            by default (element == None or 0 or ""),
            all atoms are translated.

        @return: (numpy.ndarray) indices of the atoms that have been shifted.
        """
        if element:
            try:
                sel = self.data['t'] == int(element)
            except ValueError:
                sel = self.data['s'] == element
        else:
            sel = np.ones_like(self.data['t'])
        idx = np.where(sel)
        self.data['x'][idx] += vector[0]
        self.data['y'][idx] += vector[1]
        self.data['z'][idx] += vector[2]

        return idx[0]

    def matrix_transform(self, matrix):
        """
        apply a transformation matrix to each atom of the cluster.
        
        the transformed atom positions are calculated as v = R * transpose(v)
        
        @param matrix: transformation matrix
        
        @return: None 
        """
        pos = np.empty((3, self.data.shape[0]), np.float32)
        pos[0, :] = self.data['x']
        pos[1, :] = self.data['y']
        pos[2, :] = self.data['z']
        pos = np.matmul(matrix, pos)
        self.data['x'] = pos[0, :]
        self.data['y'] = pos[1, :]
        self.data['z'] = pos[2, :]

    def rotate_x(self, angle):
        """
        rotate cluster about the x-axis

        @param angle (float) in degrees
        """
        angle = math.radians(angle)
        s = math.sin(angle)
        c = math.cos(angle)
        matrix = np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
        self.matrix_transform(matrix)

    def rotate_y(self, angle):
        """
        rotate cluster about the y-axis

        @param angle (float) in degrees
        """
        angle = math.radians(angle)
        s = math.sin(angle)
        c = math.cos(angle)
        matrix = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
        self.matrix_transform(matrix)

    def rotate_z(self, angle):
        """
        rotate cluster about the z-axis (surface normal)

        @param angle (float) in degrees
        """
        angle = math.radians(angle)
        s = math.sin(angle)
        c = math.cos(angle)
        matrix = np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
        self.matrix_transform(matrix)

    def find_positions(self, pos, tol=0.001):
        """
        find all atoms which occupy a given position.

        @param pos: position vector.
            this can be a numpy.ndarray with shape (3)
            or any type where pos[0] represents the x-coordinate, pos[1] y, and pos[2] z.
        
        @param tol: (float) matching tolerance per coordinate.

        @return numpy.array of indices which match v_pos.
        """
        if isinstance(pos, np.ndarray):
            assert pos.shape == (3,)
        else:
            pos = np.array((pos[0], pos[1], pos[2]))
        b2 = np.abs(pos - self.get_positions()) < tol
        b1 = np.all(b2, axis=1)
        idx = np.where(b1)
        return idx[0]

    def find_index_cylinder(self, pos, r_xy, r_z, element):
        """
        find atoms of a given element within a cylindrical volume and return their indices.

        @param pos: (numpy.array, shape = (3)) center position of the cylinder.

        @param r_xy: (float) radius of the cylinder.
            returned atoms must match |atom(x,y) - pos(x,y)| <= r_xy.

        @param r_z: (float) half height of the cylinder.
            returned atoms must match |atom(z) - pos(z)| <= r_z.

        @param element: (str or int) element symbol or atomic number.
            if None, the element is not checked.

        @return numpy.array of indices which match v_pos.
        """
        pos_xy = pos[0:2]
        self_xy = np.empty((self.data.shape[0], 2), np.float32)
        self_xy[:, 0] = self.data['x']
        self_xy[:, 1] = self.data['y']
        b_xy = np.linalg.norm(self_xy - pos_xy, axis=1) <= r_xy

        pos_z = pos[2]
        self_z = self.data['z']
        b_z = np.abs(self_z - pos_z) <= r_z

        if element is not None:
            try:
                b_el = self.data['t'] == int(element)
            except ValueError:
                b_el = self.data['s'] == element
            b_all = np.all([b_xy, b_z, b_el], axis=0)
        else:
            b_all = np.all([b_xy, b_z], axis=0)

        idx = np.where(b_all)
        return idx[0]

    def trim_cylinder(self, r_xy, r_z):
        """
        remove atoms outside a given cylinder.

        the cylinder is centered at the origin.

        @param r_xy: (float) radius of the cylinder.
            atoms to keep must match |atom(x,y)| <= r_xy.

        @param r_z: (float) half height of the cylinder.
            atoms to keep must match |atom(z)| <= r_z.

        @return: None
        """
        self_xy = np.empty((self.data.shape[0], 2), np.float32)
        self_xy[:, 0] = self.data['x']
        self_xy[:, 1] = self.data['y']
        b_xy = np.linalg.norm(self_xy, axis=1) <= r_xy

        self_z = self.data['z']
        b_z = np.abs(self_z) <= r_z

        b_all = np.all([b_xy, b_z], axis=0)
        idx = np.where(b_all)
        self.data = self.data[idx]
        self.update_index()

    def trim_paraboloid(self, rxy, z0):
        """
        remove atoms outside a given paraboloid.

        the paraboloid is defined by z(r) = z0 * (1 - r**2 / rxy**2),
        where r**2 = x**2 + y**2.
        its apex is at (0, 0, z0).
        z(r) = 0 for x**2 + y**2 = rxy**2.

        the coordinates (x,y,z) of atoms to keep must match z >= z(r).

        @param rxy: (float) radius of the paraboloid at z = 0.

        @param z0: (float) vertical coordinate of the apex (at x = y = 0).
            in the usual cluster layout where the surface is near z = 0 and the atoms at lower z,
            this value is negative.

        @return: None
        """
        rsq = self.data['x']**2 + self.data['y']**2
        pz = z0 * (1. - rsq / rxy**2)
        idx = np.where(self.data['z'] >= pz)
        self.data = self.data[idx]
        self.update_index()

    def trim_sphere(self, radius):
        """
        remove atoms outside a given sphere.

        the sphere is centered at the origin.

        @param radius: (float) radius of the sphere.
            atoms to keep must match |atom(x,y,z)| <= radius.

        @return: None
        """
        self_xyz = self.get_positions()
        b_xyz = np.linalg.norm(self_xyz, axis=1) <= radius
        idx = np.where(b_xyz)
        self.data = self.data[idx]
        self.update_index()

    def trim_slab(self, axis, center, depth):
        """
        remove atoms outside a slab that is parallel to one of the coordinate planes.

        @param axis: axis to trim: 'x', 'y' or 'z'.
        @param center: center position of the slab.
        @param depth: thickness of the slab.

        @return: None
        """
        coord = np.empty(self.data.shape, np.float32)
        coord[:] = self.data[axis]
        sel = np.abs(coord - center) <= depth / 2
        idx = np.where(sel)
        self.data = self.data[idx]
        self.update_index()

    def set_emitter(self, pos=None, idx=-1, tol=0.001):
        """
        select an atom as emitter.
        
        the emitter atom can be specified by position or index.
        either one of the pos or idx arguments must be specified.

        @param idx: (int) array index of the atom.
        
        @param pos: (numpy.array, shape = (3)) position vector.
        
        @param tol: (float) matching tolerance per component if pos argument is used.

        @raise IndexError if the position cannot be found
        """
        if pos is not None:
            ares = self.find_positions(pos, tol)
            idx = ares[0]
        item = self.data[idx]
        item['e'] = 1
        
    def move_to_first(self, pos=None, idx=0, tol=0.001):
        """
        move an atom to the first position.
        
        the emitter atom can be specified by position or index.
        either one of the pos or idx arguments must be specified.

        @param idx: (int) array index of the atom.
            must be greater than 1 to have an effect.
        
        @param pos: (numpy.array, shape = (3)) position vector.
        
        @param tol: (float) matching tolerance per component if pos argument is used.

        @raise IndexError if the position cannot be found
        """

        if pos is not None:
            ares = self.find_positions(pos, tol)
            idx = ares[0]
        if idx:
            em = self.data[idx]
            self.data = np.delete(self.data, idx)
            self.data = np.insert(self.data, 0, em)
            self.update_index()

    def get_positions(self):
        """
        get the atom coordinates in a two-dimensional array.

        the returned array is an independent copy of the original data.
        changes will not affect the original cluster.
        
        @return numpy.ndarray, shape = (N,3)
        """
        pos = np.empty((self.data.shape[0], 3), np.float32)
        pos[:, 0] = self.data['x']
        pos[:, 1] = self.data['y']
        pos[:, 2] = self.data['z']
        return pos

    def set_positions(self, positions):
        """
        set atom coordinates from an array of shape (N,3).
        
        this method can be used on a modified array obtained from get_positions.
        N must be the number of atoms defined in the cluster.
        
        @param positions: numpy.ndarray of shape (N,3) where N is the number of atoms in this cluster.
         
        @return: None
        
        @raise AssertionError if the array sizes do not match.
        """
        assert isinstance(positions, np.ndarray)
        assert positions.shape == (self.data.shape[0], 3)
        self.data['x'] = positions[:, 0]
        self.data['y'] = positions[:, 1]
        self.data['z'] = positions[:, 2]

    def get_position(self, index):
        """
        get the position of a single atom.
        
        @param index: (int) index of the atom.
        
        @return numpy.array, shape = (3): position vector.
            the array instance is independent from the original array.
        """
        rec = self.data[index]
        return np.array((rec['x'], rec['y'], rec['z']))

    def get_atom_count(self):
        """
        get the number of atoms (positions) in the cluster.

        @return the number of atoms in the cluster.
        """
        return self.data.shape[0]

    def get_atomtype(self, index):
        """
        get the chemical element number of an atom.
        
        @param index: (int) index of the atom.
        
        @return int: chemical element number.
        """
        rec = self.data[index]
        return rec['t']

    def get_symbol(self, index):
        """
        get the chemical element symbol of an atom.
        
        @param index: (int) index of the atom.
        
        @return string: chemical element symbol.
        """
        rec = self.data[index]
        return rec['s']

    def get_emitters(self, fields):
        """
        get a list of all emitters.

        @param fields: list of field (column) names to return

        @return list of tuples. each tuple contains the values of the requested fields.
        """
        idx = self.data['e'] != 0
        ems = self.data[fields][idx]
        return [tuple(em) for em in ems]

    def get_emitter_count(self):
        """
        get the number of emitters in the cluster.

        @return the number of atoms marked as emitter.
        """
        idx = self.data['e'] != 0
        return np.sum(idx)

    def calc_scattering_angles(self, index_emitter, radius):
        """
        calculate forward-scattering angles of the cluster atoms

        for each atom within a given radius of the emitter,
        the connecting vector between emitter and scatterer is calculated
        and returned in cartesian and polar coordinates.

        @param index_emitter: atom index of the emitter.
            all angles are calculated with respect to this atom.

        @param radius: include only atoms within this radius of the emitter.

        @note back-scattering angles can be obtained by inverting the angle on the unit sphere:
            th' = 180 - th, ph' = -ph.

        @return dictionary with results.
            each item is a numpy.ndarray of shape (N, M)
            where N is the number of scatterers
            and M = 3 for dict['xyz'] and M = 1 otherwise.
            @arg dict['index']: atom index into the cluster array.
            @arg dict['xyz']: connecting vector between the emitter and the atom in cartesian coordinates.
            @arg dict['dist']: distance between the emitter and the atom.
            @arg dict['polar']: polar angle with respect to the z-axis.
            @arg dict['azimuth']: azimuthal angle with respect to the x-axis.
        """
        # position of emitter atom
        em = self.data[index_emitter]
        em = np.asarray((em['x'], em['y'], em['z']))

        # relative positions of scattering atoms
        xyz = self.get_positions()
        xyz -= em
        dist = np.linalg.norm(xyz, axis=1)
        sel1 = dist <= radius
        sel2 = dist > 0.
        idx = np.where(np.all([sel1, sel2], axis=0))
        xyz = xyz[idx]
        dist = dist[idx]

        # angles
        v1 = np.asarray([0, 0, 1])
        v2 = np.transpose(xyz / dist.reshape((dist.shape[0], 1)))
        th = np.degrees(np.arccos(np.clip(np.dot(v1, v2), -1., 1.)))
        ph = np.degrees(np.arctan2(v2[1], v2[0]))
        return {'index': idx[0], 'xyz': xyz, 'dist': dist, 'polar': th, 'azimuth': ph}

    def load_from_file(self, f, fmt=FMT_DEFAULT):
        """
        load a cluster from a file created by the scattering program.

        the file formats differ in the columns that they contain.
        only the 'x', 'y', 'z' coordinates are common to all formats.
        at least one of the 's' and 't' columns must be present.
        missing columns are initialized as follows.

        @arg 'i': reset to a 1-based sequential index (@ref update_index).
        @arg 's': derived from the 't' column (@ref update_symbols).
        @arg 't': derived from the 's' column (@ref update_atomtypes).
        @arg 'e': set to 0.
        @arg 'c': set equal to the 't' column (@ref init_atomclasses).
        @arg 'q': set to 0.
        
        @param f: path name or open file handle of the cluster file.
        
        @param fmt: file format.
            must be one of the FMT_ constants.
            if FMT_DEFAULT, self.file_format is used.
        
        @remark if the filename ends in .gz, the file is loaded from compressed gzip format
        """
        if fmt == FMT_DEFAULT:
            fmt = self.file_format

        if fmt == FMT_MSC:
            dtype = DTYPE_CLUSTER_MSC
            fields = FIELDS_CLUSTER_MSC
            sh = 0
        elif fmt == FMT_EDAC:
            dtype = DTYPE_CLUSTER_EDAC
            fields = FIELDS_CLUSTER_EDAC
            sh = 1
        elif fmt == FMT_XYZ:
            dtype = DTYPE_CLUSTER_XYZ
            fields = FIELDS_CLUSTER_XYZ
            sh = 2
        elif fmt == FMT_PHAGEN_OUT:
            dtype = DTYPE_CLUSTER_PHAGEN_OUT
            fields = FIELDS_CLUSTER_PHAGEN_OUT
            sh = 1
        elif fmt == FMT_PHAGEN_IN:
            dtype = DTYPE_CLUSTER_PHAGEN_IN
            fields = FIELDS_CLUSTER_PHAGEN_IN
            sh = 0
        elif fmt == FMT_PMSCO:
            dtype = DTYPE_CLUSTER_INTERNAL
            fields = FIELDS_CLUSTER_INTERNAL
            sh = 1
        else:
            raise ValueError("unknown file format {}".format(fmt))

        data = np.atleast_1d(np.genfromtxt(f, dtype=dtype, skip_header=sh))
        if fmt == FMT_PHAGEN_IN and data['t'][-1] < 1:
            data = data[:-1]

        self.data = np.empty(data.shape, dtype=self.dtype)
        self.data['x'] = data['x']
        self.data['y'] = data['y']
        self.data['z'] = data['z']
        if 'i' in fields:
            self.data['i'] = data['i']
        else:
            self.update_index()
        if 't' in fields:
            self.data['t'] = data['t']
        if 's' in fields:
            self.data['s'] = data['s']
        elif 't' in fields:
            self.update_symbols()
        if 't' not in fields:
            if 's' in fields:
                self.update_atomtypes()
        if 'e' in fields:
            self.data['e'] = data['e']
        else:
            self.data['e'] = 0
        if 'c' in fields:
            self.data['c'] = data['c']
        else:
            self.data['c'] = 0
        if 'q' in fields:
            self.data['q'] = data['q']
        else:
            self.data['q'] = 0.

        pos = self.get_positions()
        # note: np.linalg.norm does not accept axis argument in version 1.7
        # (check np.version.version)
        norm = np.sqrt(np.sum(pos**2, axis=1))
        self.rmax = np.max(norm)

    def update_symbols(self):
        """
        update element symbols from element numbers.
        
        if you have modified the element numbers in the self.data array directly,
        this method updates the symbol column to make the data consistent.
        """
        for atom in self.data:
            atom['s'] = pt.elements[atom['t']].symbol

    def update_atomtypes(self):
        """
        update element numbers from element symbols.
        
        if you have modified the element symbols in the self.data array directly,
        this method updates the atom type column to make the data consistent.
        """
        for atom in self.data:
            atom['t'] = pt.elements.symbol(atom['s'].strip()).number

    def init_atomclasses(self, field_or_value='t', default_only=False):
        """
        initialize atom classes from atom types.

        atom classes identify the atomic scattering potential or scattering factors
        to be used in the multiple scattering program.

        if the scattering factors are calculated in the PMSCO process (by EDAC or PHAGEN),
        the atom classes must be set equal to the element type
        or left at the default value 0 in which case PMSCO sets the correct values.

        if the scattering factors are loaded from existing files,
        the atom class corresponds to the key of the pmsco.project.CalculatorParams.phase_files dictionary.
        in this case the meaning of the class value is up to the project,
        and the class must be set either by the cluster generator
        or the project's after_atomic_scattering hook.

        @param field_or_value: name of a cluster data field, e.g. 't', or an integer constant.

        @param default_only: initialize classes only if they are at their default value (0).

        @return None
        """
        if not default_only or np.sum(np.abs(self.data['c'])) == 0:
            if isinstance(field_or_value, str):
                self.data['c'] = self.data[field_or_value]
            else:
                self.data['c'] = field_or_value

    def update_index(self):
        """
        update the index column.
        
        if you have modified the order or number of elements in the self.data array directly,
        you may need to re-index the atoms if your code uses functions that rely on the index. 
        
        @return None
        """
        self.data['i'] = np.arange(1, self.data.shape[0] + 1)

    def update_atoms(self, clu, fields):
        """
        update atom properties from another cluster.

        this method copies selected fields from another cluster.
        the other cluster must contain the same atoms (same coordinates) in a possibly random order.
        the atoms of this and the other cluster are matched up by sorting them by coordinate.

        atomic scattering calculators often change the order of atoms in a cluster based on domain,
        and return atom classes versus atomic coordinates.
        this method allows to import the atom classes into the original cluster.

        the method checks that the other cluster contains the same number of atoms.
        it does not check that the clusters contain the same atomic positions.
        linear translations are acceptable.

        @param clu: cluster.Cluster object

        @param fields: subset of field names out of FIELDS_CLUSTER_INTERNAL.
            'i', 'x', 'y', 'z' are ignored.
            the set can be specified in any type that converts into a set of strings.

        @return: None

        @raise AssertError if the clusters do not contain the same number of atoms
        """
        assert self.data.shape == clu.data.shape
        fields = set(fields) - {'i', 'x', 'y', 'z'}
        common_order = ('z', 'y', 'x')
        index_self = np.argsort(self.data, order=common_order)
        index_other = np.argsort(clu.data, order=common_order)
        for field in fields:
            self.data[field][index_self] = clu.data[field][index_other]

    def save_to_file(self, f, fmt=FMT_DEFAULT, comment="", emitters_only=False):
        """
        save the cluster to a file which can be read by the scattering program.

        the method updates the atom index because some file formats require an index column.
        
        @param f: (string/handle) path name or open file handle of the cluster file.
            if the filename ends in .gz, the file is saved in compressed gzip format

        @param fmt: (int) file format.
            must be one of the FMT_ constants.
            if FMT_DEFAULT, self.file_format is used.

        @param comment: (str) comment line (second line) in XYZ file.
            not used in other file formats.
            by default, self.comment is used.

        @param emitters_only: (bool) if True, only atoms marked as emitters are saved.
            by default, all atoms are saved.

        @return None
        """
        if fmt == FMT_DEFAULT:
            fmt = self.file_format

        if not comment:
            comment = self.comment

        self.update_index()
        if emitters_only:
            idx = self.data['e'] != 0
            data = self.data[idx]
        else:
            data = self.data

        if fmt == FMT_MSC:
            file_format = FMT_CLUSTER_MSC
            fields = FIELDS_CLUSTER_MSC
            header = ""
        elif fmt == FMT_EDAC:
            file_format = FMT_CLUSTER_EDAC
            fields = FIELDS_CLUSTER_EDAC
            header = "{nat} l(A)".format(nat=data.shape[0])
        elif fmt == FMT_XYZ:
            file_format = FMT_CLUSTER_XYZ
            fields = FIELDS_CLUSTER_XYZ
            header = "{nat}\n{com}".format(nat=data.shape[0], com=comment)
        elif fmt == FMT_PHAGEN_IN:
            file_format = FMT_CLUSTER_PHAGEN_IN
            fields = FIELDS_CLUSTER_PHAGEN_IN
            header = None
        elif fmt == FMT_PHAGEN_OUT:
            file_format = FMT_CLUSTER_PHAGEN_OUT
            fields = FIELDS_CLUSTER_PHAGEN_OUT
            header = ""
        elif fmt == FMT_PMSCO:
            file_format = FMT_CLUSTER_INTERNAL
            fields = FIELDS_CLUSTER_INTERNAL
            names = NAMES_CLUSTER_INTERNAL
            header = "# " + " ".join([names[field] for field in fields])
        else:
            raise ValueError("unknown file format {}".format(fmt))

        data = data[fields]
        np.savetxt(f, data, fmt=file_format, header=header, comments="")


class ClusterGenerator(object):
    """
    cluster generator class.

    this class bundles the cluster methods in one place
    so that it's easier to exchange them for different kinds of clusters.

    the project must override at least the create_cluster method.
    if emitters should be run in parallel tasks, the count_emitters method must be implemented as well.
    """

    def __init__(self, project):
        """
        initialize the cluster generator.

        @param project: reference to the project object.
            cluster generators may need to look up project parameters.
        """
        self.project = project

    def count_emitters(self, model, index):
        """
        return the number of emitter configurations for a particular model, scan and domain.

        the number of emitter configurations may depend on the model parameters, scan index and domain index.
        by default, the method returns 1, which means that there is only one emitter configuration.

        emitter configurations are mainly a way to distribute the calculations to multiple processes
        since the resulting diffraction patterns add up incoherently.
        for this to work, the create_cluster() method must pay attention to the emitter index
        and generate either a full cluster with all emitters (single process)
        or a cluster with only a subset of the emitters according to the emitter index (multiple processes).
        whether all emitters are calculated in one or multiple processes is decided at run-time
        based on the available resources.

        note that this function returns the number of _configurations_ not _atoms_.
        an emitter configuration (declared in a Cluster) may include more than one atom.
        it is up to the project, what is included in a particular configuration.

        to enable multiple emitter configurations, the derived project class must override this method
        and return a number greater than 1.

        @note in some cases it may be most efficient to call create_cluster and
            return Cluster.get_emitter_count() of the generated cluster.
            this is possible because the method is called with emitter index -1.
            model and index can be passed unchanged to create_cluster.

        @param model (dictionary) model parameters to be used in the calculation.

        @param index (named tuple CalcID) calculation index.
            the method should consider only the following attributes:
            @arg scan   scan index (index into Project.scans)
            @arg domain domain index (index into Project.domains)
            @arg emit   emitter index must be -1.

        @return number of emitter configurations.
            this implementation returns the default value of 1.
        """
        return 1

    def create_cluster(self, model, index):
        """
        create a Cluster object given the model parameters and calculation index.

        the generated cluster will typically depend on the model parameters.
        depending on the project, it may also depend on the scan index, domain index and emitter index.

        the scan index can be used to generate a different cluster for different scan geometry,
        e.g., if some atoms can be excluded due to a longer mean free path.
        if this is not the case for the specific project, the scan index can be ignored.

        the domain index may select a particular domain that has a different atomic arrangement.
        in this case, depending on the value of index.domain, the function must generate a cluster corresponding
        to the particular domain.
        the method can ignore the domain index if the project defines only one domain,
        or if the domain does not correspond to a different atomic structure.

        the emitter index selects a particular emitter configuration.
        depending on the value of the emitter index, the method must react differently:

        1. if the value is -1, return the full cluster and mark all inequivalent emitter atoms.
           emitters which are reproduced by a domain expansion in combine_emitters() should not be marked.
           the full diffraction scan will be calculated in one calculation.

        2. if the value is greater or equal to zero, generate the cluster with the emitter configuration
           selected by the emitter index.
           the index is in the range between 0 and the return value of count_emitters() minus 1.
           the results of the individual emitter calculations are summed up in combine_emitters().

        the code should ideally be written such that either case yields the same diffraction result.
        if count_emitters() always returns 1 (default), the second case does not have to be implemented,
        and the method can ignore the emitter index.

        the method must ignore the region index.

        if the creation of a cluster fails due to invalid model parameters,
        rather than raising an exception or constraining values,
        the method should send an error message to the logger and return an empty cluster or None.

        @param model (dictionary) model parameters to be used in the calculation.

        @param index (named tuple CalcID) calculation index.
            the method should consider only the following attributes:
            @arg scan   scan index (index into Project.scans)
            @arg domain domain index (index into Project.domains)
            @arg emit   emitter index.
                            if -1, generate the full cluster and mark all emitters.
                            if greater or equal to zero, the value is a zero-based index of the emitter configuration.

        @return None.
            sub-classes must return a valid Cluster object or None in case of failure.
        """
        return None


class LegacyClusterGenerator(ClusterGenerator):
    """
    cluster generator class for projects that don't declare a generator.

    in previous versions, the create_cluster and count_emitters methods were implemented by the project class.
    this class redirects generator calls to the project methods
    providing compatibility to older project code.
    """

    def __init__(self, project):
        super(LegacyClusterGenerator, self).__init__(project)

    def count_emitters(self, model, index):
        """
        redirect the call to the corresponding project method if implemented.
        """
        try:
            return self.project.count_emitters(model, index)
        except AttributeError:
            return 1

    def create_cluster(self, model, index):
        """
        redirect the call to the corresponding project method.
        """
        return self.project.create_cluster(model, index)


def parse_cli():
    """
    parse the command line

    @return: Namespace object created by the argument parser.
    """
    import argparse
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="""
            cluster conversion
            """)
    format_choices = ["PMSCO", "MSC", "EDAC", "XYZ", "PHAGEN_OUT", "PHAGEN_IN"]
    parser.add_argument('input_format',
                        choices=format_choices,
                        help="format of input file")
    parser.add_argument('input_file',
                        help="path and name of input file")
    parser.add_argument('output_format',
                        choices=format_choices,
                        help="format of output file")
    parser.add_argument('output_file',
                        help="path and name of output file")

    args = parser.parse_args()

    return args


def convert_cli(args):
    """
    convert cluster files from one format into another

    this function is part of the command line interface

    @param args: command line arguments

    @return: None
    """
    clu = Cluster()
    clu.file_format = FMT_PMSCO
    input_format = globals()["FMT_" + args.input_format.upper()]
    output_format = globals()["FMT_" + args.output_format.upper()]
    clu.load_from_file(args.input_file, input_format)
    clu.save_to_file(args.output_file, output_format)


def main_cli():
    """
    command line interface to convert cluster files

    see @ref convert_cli.

    @return: None
    """
    args = parse_cli()
    convert_cli(args)


if __name__ == '__main__':
    main_cli()
    sys.exit(0)