1015 lines
37 KiB
Python
1015 lines
37 KiB
Python
"""
|
|
@package pmsco.data
|
|
Import, export, evaluation of msc data.
|
|
|
|
This module provides common functions for loading/saving and manipulating PED scan data sets.
|
|
|
|
@author Matthias Muntwiler
|
|
|
|
@copyright (c) 2015-23 by Paul Scherrer Institut @n
|
|
Licensed under the Apache License, Version 2.0 (the "License"); @n
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
"""
|
|
|
|
import logging
|
|
import math
|
|
import numpy as np
|
|
import numpy.typing as npt
|
|
import os
|
|
import scipy.special
|
|
import scipy.optimize as so
|
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Sequence, Set, Tuple, Union
|
|
import h5py
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import loess
|
|
except (ModuleNotFoundError, ImportError) as e:
|
|
loess = None
|
|
logger.critical("Error importing the loess package.", exc_info=e)
|
|
|
|
## energy, intensity
|
|
DTYPE_EI = [('e', 'f4'), ('i', 'f4')]
|
|
## energy, theta, phi, intensity
|
|
DTYPE_ETPI = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('i', 'f4')]
|
|
## energy, theta, phi, intensity, sigma (standard deviation)
|
|
DTYPE_ETPIS = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('i', 'f4'), ('s', 'f4')]
|
|
## energy, theta, phi, alpha, intensity
|
|
DTYPE_ETPAI = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('a', 'f4'), ('i', 'f4')]
|
|
## energy, theta, phi, alpha, intensity, sigma (standard deviation)
|
|
DTYPE_ETPAIS = [('e', 'f4'), ('t', 'f4'), ('p', 'f4'), ('a', 'f4'), ('i', 'f4'), ('s', 'f4')]
|
|
## theta, phi
|
|
DTYPE_TP = [('t', 'f4'), ('p', 'f4')]
|
|
## theta, phi, intensity
|
|
DTYPE_TPI = [('t', 'f4'), ('p', 'f4'), ('i', 'f4')]
|
|
## theta, phi, intensity, sigma (standard deviation)
|
|
DTYPE_TPIS = [('t', 'f4'), ('p', 'f4'), ('i', 'f4'), ('s', 'f4')]
|
|
## intensity, theta, phi
|
|
DTYPE_ITP = [('i', 'f4'), ('t', 'f4'), ('p', 'f4')]
|
|
|
|
DTYPES = {'EI': DTYPE_EI, 'ETPI': DTYPE_ETPI, 'ETPIS': DTYPE_ETPIS, 'ETPAI': DTYPE_ETPAI, 'ETPAIS': DTYPE_ETPAIS,
|
|
'TP': DTYPE_TP, 'TPI': DTYPE_TPI, 'TPIS': DTYPE_TPIS, 'ITP': DTYPE_ITP, }
|
|
DATATYPES = DTYPES.keys
|
|
|
|
## supportd scan types
|
|
# @arg @c 'E' energy
|
|
# @arg @c 'EA' energy - alpha (analyser)
|
|
# @arg @c 'ET' energy - theta
|
|
# @arg @c 'TP' theta - phi (holo scan)
|
|
SCANTYPES = ['E', 'EA', 'ET', 'TP']
|
|
|
|
GenTextFileLike = Union[str, os.PathLike, Iterable[str], int]
|
|
OSFileLike = Union[str, os.PathLike, int]
|
|
|
|
|
|
def create_etpi(shape: Tuple[int], sigma_column: bool = True) -> np.ndarray:
|
|
"""
|
|
create an ETPI array of a given size.
|
|
|
|
an ETPI array is a numpy structured array.
|
|
the array is initialized with zeroes.
|
|
|
|
@param shape (tuple) shape of the array
|
|
@param sigma_column: whether the array should include a sigma field (ETPIS type instead of ETPI)
|
|
"""
|
|
if sigma_column:
|
|
data = np.zeros(shape, dtype=DTYPE_ETPIS)
|
|
else:
|
|
data = np.zeros(shape, dtype=DTYPE_ETPI)
|
|
return data
|
|
|
|
|
|
def create_data(shape: Tuple[int], datatype: str = '', dtype: Optional[npt.DTypeLike] = None) -> np.ndarray:
|
|
"""
|
|
create a data array of a given size and type.
|
|
|
|
a data array is a numpy structured array.
|
|
the array is initialized with zeroes.
|
|
either datatype or dtype must be specified, dtypes takes precedence.
|
|
|
|
@param shape (tuple) shape of the array, only scalars (1-tuples) supported currently
|
|
@param datatype see DATATYPES
|
|
@param dtype see DTYPES
|
|
"""
|
|
if not dtype:
|
|
dtype = DTYPES[datatype]
|
|
data = np.zeros(shape, dtype=dtype)
|
|
return data
|
|
|
|
|
|
def holo_grid(theta_start: float = 90., theta_step: float = 1., theta_range: float = 90.,
|
|
phi_start: float = 0., phi_range: float = 360., phi_refinement: float = 1.):
|
|
"""
|
|
Generator of a holo grid with constant point density in solid angle.
|
|
|
|
The generator yields the polar coordinates of a hologram scan in the traditional Osterwalder fashion,
|
|
where the grid points are distributed evenly on the hemisphere by varying the azimuthal step size,
|
|
while the polar step size is constant.
|
|
|
|
The yield are tuples (theta, phi) in degrees.
|
|
Theta is the polar, phi the azimuthal coordinate.
|
|
|
|
@param theta_start Maximum polar angle in degrees, 0..90. Defaults to 90 (grazing emission).
|
|
@param theta_step Polar angle step in degrees, 1..90. Defaults to 1.
|
|
@param theta_range Polar angle range in degrees, 1..th_start. Defaults to 90.
|
|
@param phi_start Azimuthal start angle in degrees. Defaults to 0.
|
|
This azimuth is included at every polar step.
|
|
@param phi_range Azimuthal range in degrees. Defaults to 360.
|
|
@param phi_refinement Azimuthal refinement/oversampling (scalar). Defaults to 1.
|
|
A refinement of 2 yields a factor 2 more grid points in the azimuthal sub-scans.
|
|
|
|
@return yield tuples (theta, phi) in degrees
|
|
"""
|
|
|
|
deg2rad = 0.01745329
|
|
|
|
def calc_phi_step(th):
|
|
if th < 0.5 or int(phi_range * math.sin(th * deg2rad) * phi_refinement / theta_step) == 0:
|
|
phi_st = 0.0
|
|
else:
|
|
phi_st = phi_range / int(th / theta_start * phi_range / theta_step)
|
|
if abs(phi_st) < 0.001:
|
|
phi_st = 360.
|
|
return phi_st
|
|
|
|
for theta in np.arange(theta_range, -theta_step, -theta_step):
|
|
phi_step = calc_phi_step(theta)
|
|
for phi in np.arange(phi_start, phi_range, phi_step):
|
|
yield theta, phi
|
|
|
|
|
|
def holo_array(generator: Callable[..., Iterable[Tuple[float, float]]],
|
|
generator_args: Dict,
|
|
datatype: str = 'TP',
|
|
dtype: Optional[npt.DTypeLike] = None) -> np.ndarray:
|
|
|
|
"""
|
|
Create an hologram scan grid in a numpy array.
|
|
|
|
A holo data array is a numpy structured array containing at least
|
|
a column for theta (polar angle) and phi (azimuthal angle).
|
|
The theta and phi columns are filled with angles from the holo_grid (or custom generator) function.
|
|
The array can contain further columns for energy, intensity, etc. according to the data type specified.
|
|
These columns are initialized with zeroes.
|
|
|
|
@param generator Generator that yields tuples (theta, phi) for each grid point,
|
|
given the keyword arguments kwargs.
|
|
Defaults to holo_grid, the traditional Osterwalder holo scan.
|
|
@param generator_args Keyword arguments to be passed to the generator.
|
|
For arguments of the traditional holo scan, see the documentation of holo_grid.
|
|
@param datatype See DATATYPES. Must contain 'T' and 'P' dimensions. Defaults to 'TP'.
|
|
@param dtype See DTYPES. Must contain a 't' and 'p' column. Takes precedence over datatype.
|
|
Defaults to None (not specified).
|
|
"""
|
|
|
|
if not dtype:
|
|
dtype = DTYPES[datatype]
|
|
|
|
tp = np.fromiter(generator(**generator_args), dtype=DTYPES['TP'])
|
|
|
|
result = np.zeros(tp.shape, dtype=dtype)
|
|
result['t'] = tp['t']
|
|
result['p'] = tp['p']
|
|
|
|
return result
|
|
|
|
|
|
def analyse_holoscan_steps(holoscan: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
"""
|
|
Find the polar and azimuthal steps in a holoscan.
|
|
|
|
@param holoscan:
|
|
@return: thetas: unique theta angles. sorted.
|
|
dtheta: theta steps for each theta
|
|
dphi: phi step for each theta
|
|
"""
|
|
|
|
thetas, indices, counts = np.unique(holoscan['t'], return_index=True, return_counts=True)
|
|
dtheta = np.diff(thetas)
|
|
dtheta = np.append(dtheta, dtheta[-1])
|
|
|
|
adjusted_phis = np.append(holoscan['p'], holoscan['p'][-1])
|
|
phis0 = adjusted_phis[indices]
|
|
phis1 = adjusted_phis[indices+1]
|
|
dphi = phis1 - phis0
|
|
phi_range = counts[-1] * dphi[-1]
|
|
dphi[counts <= 1] = phi_range
|
|
|
|
return thetas, dtheta, dphi
|
|
|
|
|
|
def load_plt(filename: GenTextFileLike, int_column: int = -1) -> np.ndarray:
|
|
"""
|
|
loads ETPI data from an MSC output (plt) file
|
|
|
|
plt file format:
|
|
5-9 columns, space or tab delimited
|
|
column 0: energy
|
|
column 1: momentum
|
|
column 2: theta
|
|
column 3: phi
|
|
columns 4-8: intensities
|
|
comment lines must start with # character
|
|
|
|
filename: path or name of the file to be read
|
|
|
|
int_column: index of the column to be read as intensity
|
|
typical values: 4, 5, 6, 7, 8
|
|
or negative: -1 (last), -2, (second last), ...
|
|
default: -1
|
|
|
|
returns a structured one-dimensional numpy.ndarray
|
|
|
|
data[i]['e'] = energy
|
|
data[i]['t'] = theta
|
|
data[i]['p'] = phi
|
|
data[i]['i'] = selected intensity column
|
|
"""
|
|
data = np.atleast_1d(np.genfromtxt(filename, usecols=(0, 2, 3, int_column), dtype=DTYPE_ETPI))
|
|
sort_data(data)
|
|
return data
|
|
|
|
|
|
def load_edac_pd(filename: OSFileLike, int_column: int = -1,
|
|
energy: float = 0.0, theta: float = 0.0, phi: float = 0.0, fixed_cluster: bool = False) -> np.ndarray:
|
|
"""
|
|
load ETPI or ETPAI data from an EDAC PD output file.
|
|
|
|
EDAC file format:
|
|
@arg row 0: "--- scan PD"
|
|
@arg row 1: column names
|
|
@arg rows 2 and following: space delimited data
|
|
|
|
@arg first columns (up to 3): energy, theta, phi depending on scan
|
|
@arg last columns (arbitrary number): intensity at the recursion order specified in the header
|
|
|
|
@param filename: path or name of the file to be read
|
|
|
|
@param int_column: index of the column to be read as intensity.
|
|
typical values: -1 (last), -2, (second last), ...
|
|
default: -1
|
|
|
|
@param energy: default value if energy column is missing
|
|
@param theta: default value if theta column is missing
|
|
@param phi: default value if phi column is missing
|
|
|
|
@param fixed_cluster:
|
|
if True, (theta, phi) are mapped to (alpha, phi). theta is copied from function argument.
|
|
if False, angles are copied literally.
|
|
|
|
@return a structured one-dimensional numpy.ndarray (ETPI or ETPAI)
|
|
|
|
@verbatim
|
|
data[i]['e'] = energy
|
|
data[i]['t'] = theta
|
|
data[i]['p'] = phi
|
|
data[i]['i'] = selected intensity column
|
|
@endverbatim
|
|
"""
|
|
|
|
with open(filename, "rt", encoding="latin1") as f:
|
|
header1 = f.readline().strip()
|
|
header2 = f.readline().strip()
|
|
if not header1 == '--- scan PD':
|
|
logger.warning("unexpected EDAC output file header format")
|
|
|
|
col_names = header2.split()
|
|
dtype = []
|
|
cols = []
|
|
ncols = 0
|
|
for name in col_names:
|
|
if name == "eV":
|
|
dtype.append(('e', 'f4'))
|
|
cols.append(ncols)
|
|
ncols += 1
|
|
elif name == "theta":
|
|
dtype.append(('t', 'f4'))
|
|
cols.append(ncols)
|
|
ncols += 1
|
|
elif name == "phi":
|
|
dtype.append(('p', 'f4'))
|
|
cols.append(ncols)
|
|
ncols += 1
|
|
elif name == "order":
|
|
dtype.append(('i', 'f4'))
|
|
cols.append(int_column)
|
|
ncols += 1
|
|
break
|
|
else:
|
|
logger.warning("unexpected EDAC output file column name")
|
|
break
|
|
cols = tuple(cols)
|
|
raw = np.atleast_1d(np.genfromtxt(filename, usecols=cols, dtype=dtype, skip_header=2))
|
|
|
|
if fixed_cluster:
|
|
etpi = np.empty(raw.shape, dtype=DTYPE_ETPAI)
|
|
else:
|
|
etpi = np.empty(raw.shape, dtype=DTYPE_ETPI)
|
|
|
|
if 'eV' in col_names:
|
|
etpi['e'] = raw['e']
|
|
else:
|
|
etpi['e'] = energy
|
|
if 'theta' in col_names:
|
|
etpi['t'] = raw['t']
|
|
else:
|
|
etpi['t'] = theta
|
|
if 'phi' in col_names:
|
|
etpi['p'] = raw['p']
|
|
else:
|
|
etpi['p'] = phi
|
|
etpi['i'] = raw['i']
|
|
|
|
if fixed_cluster:
|
|
etpi['a'] = etpi['t']
|
|
etpi['t'] = theta
|
|
|
|
sort_data(etpi)
|
|
return etpi
|
|
|
|
|
|
def load_etpi(filename: GenTextFileLike) -> np.ndarray:
|
|
"""
|
|
loads ETPI or ETPIS data from a text file
|
|
|
|
etpi file format:
|
|
4 or 5 columns, space or tab delimited
|
|
column 0: energy
|
|
column 1: theta
|
|
column 2: phi
|
|
column 3: intensity
|
|
column 4: sigma error (standard deviation). optional defaults to 0.
|
|
comment lines must start with # character
|
|
comment lines may appear anywhere, and are ignored
|
|
|
|
filename: path or name of the file to be read
|
|
load_etpi handles compressed files (ending .gz) transparently.
|
|
|
|
returns a structured one-dimensional numpy.ndarray
|
|
|
|
data[i]['e'] = energy
|
|
data[i]['t'] = theta
|
|
data[i]['p'] = phi
|
|
data[i]['i'] = intensity
|
|
data[i]['s'] = sigma
|
|
|
|
@deprecated new code should use load_data().
|
|
"""
|
|
try:
|
|
data = np.loadtxt(filename, dtype=DTYPE_ETPIS)
|
|
except IndexError:
|
|
data = np.loadtxt(filename, dtype=DTYPE_ETPI)
|
|
sort_data(data)
|
|
return data
|
|
|
|
|
|
def load_data(filename: GenTextFileLike, dtype: Optional[npt.DTypeLike] = None):
|
|
"""
|
|
load column data (ETPI, and the like) from a text file.
|
|
|
|
the extension must specify one of DATATYPES (case insensitive)
|
|
corresponding to the meaning of the columns in the file.
|
|
|
|
@param filename
|
|
|
|
@param dtype: override data type recognition if the extension cannot be used.
|
|
must be one of the data.DTYPE constants
|
|
DTYPE_EI, DTYPE_ETPI, DTYPE_ETPIS, DTYPE_ETPAI, or DTYPE_ETPAIS.
|
|
by default, the function uses the extension to determine the data type.
|
|
the actual type can be read from the dtype attribute of the returned array.
|
|
if the extension is missing, DTYPE_EI is assumed.
|
|
|
|
@return one-dimensional numpy structured ndarray with data
|
|
|
|
@raise IOError if the file cannot be read.
|
|
|
|
@raise IndexError if the number of columns is lower than expected based on the dtype or extension.
|
|
"""
|
|
if not dtype:
|
|
(root, ext) = os.path.splitext(filename)
|
|
ext_type = ext[1:].upper()
|
|
try:
|
|
dtype = DTYPES[ext_type]
|
|
except KeyError:
|
|
dtype = DTYPE_EI
|
|
|
|
data = np.loadtxt(filename, dtype=dtype)
|
|
sort_data(data)
|
|
return data
|
|
|
|
|
|
def format_extension(data: np.ndarray) -> str:
|
|
"""
|
|
format the file extension based on the contents of an array.
|
|
|
|
@param data ETPI-like structured numpy.ndarray.
|
|
|
|
@return: file extension string including the leading period.
|
|
"""
|
|
return "." + "".join(data.dtype.names)
|
|
|
|
|
|
def save_data(filename: OSFileLike, data: npt.ArrayLike) -> None:
|
|
"""
|
|
save column data (ETPI, and the like) to a text file.
|
|
|
|
the extension must specify one of DATATYPES (case insensitive)
|
|
corresponding to the meaning of the columns in the file.
|
|
|
|
@param filename
|
|
|
|
@param data ETPI-like structured numpy.ndarray.
|
|
|
|
@remark this function is plain numpy.savetxt, provided for convenience.
|
|
"""
|
|
np.savetxt(filename, data, fmt='%g')
|
|
|
|
|
|
def sort_data(data: np.ndarray) -> None:
|
|
"""
|
|
sort scan data (ETPI and the like) in a consistent order.
|
|
|
|
the function sorts the data array along the scan dimensions energy, theta, phi and alpha.
|
|
this function should be used for all sorting of measured and calculated data
|
|
to ensure a consistent sort order.
|
|
|
|
the function determines the sort key based on the scan fields of the data array,
|
|
ignoring the intensity and sigma fields.
|
|
|
|
the function uses the _mergesort_ algorithm which preserves the relative order of indistinct elements.
|
|
|
|
@warning sorting on intensity and sigma fields would mix up the scan dimensions and produce invalid results!
|
|
|
|
@param data ETPI-like structured numpy.ndarray.
|
|
|
|
@return: None. the data array is sorted in place.
|
|
"""
|
|
sort_key = [name for name in data.dtype.names if name in {'e', 't', 'p', 'a'}]
|
|
data.sort(kind='mergesort', order=sort_key)
|
|
|
|
|
|
def restructure_data(data: np.ndarray, dtype: Optional[npt.DTypeLike] = None,
|
|
defaults: Optional[Mapping] = None) -> np.ndarray:
|
|
"""
|
|
restructure the type of a data array by adding or removing columns.
|
|
|
|
example: to combine an ETPI and an ETPAI scan, both arrays must have the same data type.
|
|
this function adds the necessary columns and initializes them with default values.
|
|
to find out the appropriate data type, use the common_dtype() function.
|
|
to concatenate arrays, call numpy.hstack on a tuple of arrays.
|
|
|
|
@param data: original data array (a structured numpy array having one of the DTYPES data types).
|
|
|
|
@param dtype: data type of the new array. must be one out of DTYPES.
|
|
default is DTYPE_ETPAIS which includes any possible field.
|
|
|
|
@param defaults: default values for new fields.
|
|
this must be a dictionary where the key is the field name and value the default value of the field.
|
|
the dictionary can contain an arbitrary sub-set of fields.
|
|
undefined fields are initialized to zero.
|
|
if the parameter is unspecified, all fields are initialized to zero.
|
|
|
|
@return: re-structured numpy array or
|
|
@c data if the new and original data types are the same.
|
|
"""
|
|
if dtype is None:
|
|
dtype = DTYPE_ETPAIS
|
|
if data.dtype == dtype:
|
|
return data
|
|
else:
|
|
new_data = np.zeros(data.shape, dtype=dtype)
|
|
fields = [dt[0] for dt in dtype if dt[0] in data.dtype.names]
|
|
|
|
if defaults is not None:
|
|
for field, value in defaults.items():
|
|
if field in new_data.dtype.names:
|
|
new_data[field] = value
|
|
|
|
for field in fields:
|
|
new_data[field] = data[field]
|
|
|
|
return new_data
|
|
|
|
|
|
def common_dtype(scans: Iterable[Union[npt.ArrayLike, npt.DTypeLike]]) -> npt.DTypeLike:
|
|
"""
|
|
determine the common data type for a number of scans.
|
|
|
|
example: to combine an ETPI and an ETPAI scan, both arrays must have the same data type.
|
|
this function determines the least common data type.
|
|
to restructure each array, use the restructure_data() function.
|
|
to concatenate arrays, call numpy.hstack on a tuple of arrays.
|
|
|
|
@param scans: iterable of scan data or types.
|
|
the elements of the list must be ETPI-like numpy structured arrays,
|
|
numpy.dtype specifiers of a permitted ETPI-like array,
|
|
or one of the DTYPE constants listed in DTYPES.
|
|
|
|
@return: DTYPE constant which includes all the fields referred to in the input data.
|
|
"""
|
|
fields = set([])
|
|
for item in scans:
|
|
if isinstance(item, np.ndarray):
|
|
names = item.dtype.names
|
|
elif isinstance(item, np.dtype):
|
|
names = item.names
|
|
else:
|
|
names = [dt[0] for dt in item]
|
|
for name in names:
|
|
fields.add(name)
|
|
|
|
dtype = [dt for dt in DTYPE_ETPAIS if dt[0] in fields]
|
|
return dtype
|
|
|
|
|
|
def detect_scan_mode(data: np.ndarray) -> Tuple[List[str], Dict[str, np.ndarray]]:
|
|
"""
|
|
detect the scan mode and unique scan positions in a data array.
|
|
|
|
the function detects which columns of the data array are scanning.
|
|
if the values of a column are not constant, the column is considered to be scanning.
|
|
the function does not require a particular ordering of the scan positions
|
|
(although other parts of the code may do so).
|
|
the function returns the names of the scanning columns.
|
|
|
|
the function also extracts unique positions for each column, and returns one array per column of input data.
|
|
in the case of a fixed (non-scanning) column, the resulting array contains one data point.
|
|
if the input data does not contain a particular column, the resulting array will contain 0 per default.
|
|
|
|
if both theta and phi columns are non-constant, the function reports a theta-phi scan.
|
|
in a theta-phi scan, each pair (theta, phi) is considered a scan position,
|
|
and uniqueness is enforced with respect to the (theta, phi) pairs.
|
|
the individual theta and phi arrays may contain duplicate values.
|
|
|
|
@param data ETPI-like structured numpy.ndarray.
|
|
only the 'e', 't', 'p', and 'a' columns are considered.
|
|
|
|
@return the tuple (scan_mode, scan_positions), where
|
|
@arg scan_mode is a list of column names that refer to the scanned variables,
|
|
i.e. non-constant columns in the input data.
|
|
possible values are 'e', 't', 'p', and 'a'.
|
|
@arg scan_positions is a dictionary of scan dimensions.
|
|
the dictionary contains one-dimensional numpy arrays, one for each dimension.
|
|
the dictionary keys are 'e', 't', 'p', and 'a'.
|
|
if a dimension is not scanned, the corresponding array contains just one element.
|
|
if the input data does not contain a column at all,
|
|
the corresponding output array is not included in the dictionary.
|
|
|
|
note the special case of theta-phi scans.
|
|
theta and phi are always returned as two separate arrays
|
|
"""
|
|
scan_mode = []
|
|
|
|
try:
|
|
scan_energy = np.unique(data['e'])
|
|
except ValueError:
|
|
scan_energy = np.array([])
|
|
try:
|
|
scan_theta = np.unique(data['t'])
|
|
except ValueError:
|
|
scan_theta = np.array([])
|
|
try:
|
|
scan_phi = np.unique(data['p'])
|
|
except ValueError:
|
|
scan_phi = np.array([])
|
|
try:
|
|
scan_alpha = np.unique(data['a'])
|
|
except ValueError:
|
|
scan_alpha = np.array([])
|
|
|
|
# theta-phi scan
|
|
if scan_theta.shape[0] >= 2 and scan_phi.shape[0] >= 2:
|
|
try:
|
|
scan_theta_phi = np.unique(data[['t', 'p']])
|
|
except ValueError:
|
|
scan_theta_phi = None
|
|
if scan_theta_phi is not None and len(scan_theta_phi.dtype.names) == 2:
|
|
scan_theta = scan_theta_phi['t']
|
|
scan_phi = scan_theta_phi['p']
|
|
|
|
scan_positions = {}
|
|
if scan_energy.shape[0] >= 1:
|
|
scan_positions['e'] = scan_energy
|
|
if scan_energy.shape[0] >= 2:
|
|
scan_mode.append('e')
|
|
if scan_theta.shape[0] >= 1:
|
|
scan_positions['t'] = scan_theta
|
|
if scan_theta.shape[0] >= 2:
|
|
scan_mode.append('t')
|
|
if scan_phi.shape[0] >= 1:
|
|
scan_positions['p'] = scan_phi
|
|
if scan_phi.shape[0] >= 2:
|
|
scan_mode.append('p')
|
|
if scan_alpha.shape[0] >= 1:
|
|
scan_positions['a'] = scan_alpha
|
|
if scan_alpha.shape[0] >= 2:
|
|
scan_mode.append('a')
|
|
|
|
return scan_mode, scan_positions
|
|
|
|
|
|
def filter_tp(data: np.ndarray, _filter: np.ndarray) -> np.ndarray:
|
|
"""
|
|
select data points from an ETPI array that match theta and phi coordinates of another ETPI array.
|
|
|
|
the matching tolerance is 0.001.
|
|
|
|
@param data ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
|
|
|
|
@param _filter ETPI-like structured numpy.ndarray (ETPI, ETPIS, ETPAI, ETPAIS).
|
|
only 't' and 'p' columns are used.
|
|
|
|
@return filtered data (numpy.ndarray)
|
|
copy of selected data rows from input data.
|
|
same data type as input data.
|
|
"""
|
|
# copy theta,phi into separate structured arrays
|
|
data_tp = np.zeros_like(data, dtype=[('t', '<i4'), ('p', '<i4')])
|
|
filt_tp = np.zeros_like(_filter, dtype=[('t', '<i4'), ('p', '<i4')])
|
|
# multiply by 10, round to integer
|
|
data_tp['t'] = np.around(data['t'] * 10.0)
|
|
data_tp['p'] = np.around(data['p'] * 10.0)
|
|
filt_tp['t'] = np.around(_filter['t'] * 10.0)
|
|
filt_tp['p'] = np.around(_filter['p'] * 10.0)
|
|
# calculate intersection
|
|
idx = np.in1d(data_tp, filt_tp)
|
|
result = data[idx]
|
|
return result
|
|
|
|
|
|
def interpolate_hemi_scan(rect_tpi: np.ndarray, hemi_tpi: np.ndarray) -> np.ndarray:
|
|
"""
|
|
interpolate a hemispherical scan from a rectangular angle scan.
|
|
|
|
the function interpolates in phi (azimuth) only.
|
|
the rectangular array must contain a matching scan line for each theta (polar angle) of the hemi scan.
|
|
this requires that the hemi scan have a linear theta axis.
|
|
|
|
@param rect_tpi TPI structured numpy.ndarray.
|
|
rectangular theta-phi scan.
|
|
each azimuthal line has the same number of points and range.
|
|
the azimuthal coordinate is monotonically increasing.
|
|
@param hemi_tpi TPI structured numpy.ndarray.
|
|
hemispherical theta-phi scan.
|
|
each theta of the hemi scan must have a matching scan line in the rectangular scan.
|
|
the array may contain additional columns (E, A, S) as long as each (theta,phi) pair is unique.
|
|
the extra columns are not altered.
|
|
@return hemi_tpi with the interpolation result in the I column.
|
|
"""
|
|
lin_theta = np.unique(hemi_tpi['t'])
|
|
for theta in lin_theta:
|
|
sel_theta = np.abs(hemi_tpi['t'] - theta) < 0.1
|
|
lin_phi = hemi_tpi['p'][sel_theta]
|
|
|
|
sel_rect_theta = np.abs(rect_tpi['t'] - theta) < 0.1
|
|
rect_phi_1d = rect_tpi['p'][sel_rect_theta]
|
|
rect_int_1d = rect_tpi['i'][sel_rect_theta]
|
|
|
|
result = np.interp(lin_phi, rect_phi_1d, rect_int_1d)
|
|
hemi_tpi['i'][sel_theta] = result
|
|
return hemi_tpi
|
|
|
|
|
|
def reshape_2d(flat_data: np.ndarray, axis_columns: Sequence[str], return_column: str = 'i') -> \
|
|
Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
"""
|
|
reshape an ETPI-like array into a two-dimensional array according to the scan axes.
|
|
|
|
@param flat_data structured, one-dimensional numpy.ndarray with column labels.
|
|
the array must contain a rectangular scan grid.
|
|
the array must be sorted in the order of axis_labels.
|
|
|
|
@param axis_columns list of column names that designate the axes
|
|
|
|
@param return_column: name of field to return in two dimensions
|
|
|
|
@return the tuple (result_data, axis0, axis1), where
|
|
@arg result_data (ndarray) new two-dimensional ndarray of the scan
|
|
@arg axis0 (ndarray) scan positions along the first dimension
|
|
@arg axis1 (ndarray) scan positions along the second dimension
|
|
"""
|
|
|
|
axis0 = np.unique(flat_data[axis_columns[0]])
|
|
n0 = len(axis0)
|
|
axis1 = np.unique(flat_data[axis_columns[1]])
|
|
n1 = len(axis1)
|
|
data = np.reshape(flat_data[return_column], (n0, n1), order='C')
|
|
return data.copy(), axis0, axis1
|
|
|
|
|
|
def calc_modfunc_mean(data: np.ndarray) -> np.ndarray:
|
|
"""
|
|
calculates the modulation function using the mean value of data.
|
|
this is a simplified calculation method
|
|
which can be used if the I0 of the data does not have a strong variation.
|
|
|
|
@param data: ETPI array containing experimental or calculated intensity.
|
|
|
|
@return ETPI array containing the modulation function.
|
|
"""
|
|
|
|
scan_mode, scan_positions = detect_scan_mode(data)
|
|
modf = data.copy()
|
|
|
|
if len(scan_mode) == 1:
|
|
norm = np.mean(data['i'], dtype=np.float64)
|
|
modf = data.copy()
|
|
modf['i'] = (data['i'] - norm) / norm
|
|
elif len(scan_mode) == 2:
|
|
axis0 = scan_positions[scan_mode[0]]
|
|
n0 = len(axis0)
|
|
axis1 = scan_positions[scan_mode[1]]
|
|
n1 = len(axis1)
|
|
nd_data = np.reshape(data['i'], (n0, n1), order='C')
|
|
|
|
prof0 = np.mean(nd_data, axis=1, dtype=np.float64)
|
|
norm0 = np.mean(prof0, dtype=np.float64)
|
|
nd_modf = (nd_data - norm0) / norm0
|
|
|
|
modf['i'] = np.ravel(nd_modf, order='C')
|
|
else:
|
|
logger.error('unsupported scan in calc_modfunc_mean: {0}'.format(scan_mode))
|
|
|
|
return modf
|
|
|
|
|
|
def calc_modfunc_loess(data: np.ndarray, smth: float = 0.4) -> np.ndarray:
|
|
"""
|
|
calculate the modulation function using LOESS (locally weighted regression) smoothing.
|
|
|
|
the modulation function of I(x) is (I(x) - S(x)) / S(x)
|
|
where the array S(x) is a LOESS-smoothed copy of I(x).
|
|
|
|
this function uses true multi-dimensional LOESS smoothing,
|
|
in the same way as Igor's Loess operation.
|
|
|
|
this function uses the LOESS algorithm implemented by
|
|
William S. Cleveland, Eric Grosse, Ming-Jen Shyu, dated 18 August 1992.
|
|
the code and the python interface are included in the loess package.
|
|
|
|
@param data structured numpy.ndarray in EI, ETPI, or ETPAI format.
|
|
can contain a one- or multi-dimensional scan.
|
|
the algorithm does not require any specific scan mode or order
|
|
(no rectangular grid, no particular scan hierarchy, no sorting).
|
|
|
|
if data contains a hemispherical scan, the phi dimension is ignored,
|
|
i.e. the function effectively applies a phi-average.
|
|
|
|
the modulation function is calculated for the finite-valued scan points.
|
|
NaNs are ignored and do not affect the finite values.
|
|
|
|
@param smth: size of the smoothing window relative to the size of the scan.
|
|
reasonable values are between 0.2 and 0.5.
|
|
the default value 0.4 has been found to work in many cases.
|
|
|
|
@return copy of the data array with the modulation function in the 'i' column.
|
|
"""
|
|
sel = np.isfinite(data['i'])
|
|
_data = data[sel]
|
|
|
|
modf = data.copy()
|
|
if _data.shape[0]:
|
|
scan_mode, __ = detect_scan_mode(_data)
|
|
if 't' in scan_mode and 'p' in scan_mode:
|
|
scan_mode.remove('p')
|
|
|
|
lo = loess.loess_struct(_data.shape[0], len(scan_mode))
|
|
factors = [_data[axis] for axis in scan_mode]
|
|
lo.set_x(np.hstack(tuple(factors)))
|
|
lo.set_y(_data['i'])
|
|
lo.model.span = smth
|
|
loess.loess(lo)
|
|
|
|
modf['i'][sel] = lo.get_fitted_residuals() / lo.get_fitted_values()
|
|
else:
|
|
modf['i'] = np.nan
|
|
|
|
return modf
|
|
|
|
|
|
def square_diff_rfactor(experiment: np.ndarray, theory: np.ndarray) -> float:
|
|
"""
|
|
Calculate the R-factor from the normalized sum of squared differences.
|
|
|
|
If the sigma column is present in experiment and non-zero,
|
|
the R-factor terms are weighted by 1/sigma**2.
|
|
|
|
The input arrays must have the same shape and the coordinate columns must be identical.
|
|
The array elements are compared element-by-element.
|
|
The values of the coordinate arrays do not influence the result.
|
|
Terms having NaN intensity are ignored.
|
|
|
|
This function can be specified in the Scan.rfactor_func parameter of the project.
|
|
|
|
@param experiment: (numpy structured array)
|
|
ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
|
If an `s` field is present and non-zero,
|
|
the R-factor terms are weighted by 1/sigma**2.
|
|
|
|
@param theory: (numpy structured array)
|
|
ETPI or ETPAI array containing the theoretical function.
|
|
|
|
@return scalar R-factor in the range from 0.0 to 2.0.
|
|
|
|
@raise ValueError if the function fails (e.g. division by zero or all elements non-finite).
|
|
"""
|
|
sel = np.logical_and(np.isfinite(theory['i']), np.isfinite(experiment['i']))
|
|
theory = theory[sel]
|
|
experiment = experiment[sel]
|
|
if ('s' in experiment.dtype.names) and (experiment['s'].min()) > 0.0:
|
|
wgts = 1.0 / experiment['s'] ** 2
|
|
else:
|
|
wgts = 1.0
|
|
difs = wgts * (experiment['i'] - theory['i']) ** 2
|
|
sums = wgts * (experiment['i'] ** 2 + theory['i'] ** 2)
|
|
sum1 = difs.sum(dtype=np.float64)
|
|
sum2 = sums.sum(dtype=np.float64)
|
|
return sum1 / sum2
|
|
|
|
|
|
def scaled_rfactor_func(scale: float, experiment: np.ndarray, weights: np.ndarray, theory: np.ndarray) -> float:
|
|
"""
|
|
calculate the R-factor of a modulation function against the measurement with scaled amplitude.
|
|
|
|
this function allows to apply a scaling factor to the experimental function and returns the R-factor.
|
|
this is useful if the amplitudes of the two functions do not match due to systematic effects
|
|
of the calculation or the measurement.
|
|
|
|
this function is used by optimize_rfactor() as a scipy.optimize.least_squares optimization function,
|
|
which requires a specific signature.
|
|
|
|
NaNs will propagate to the final result.
|
|
math exceptions are not handled.
|
|
|
|
@param scale: scaling factor (> 0).
|
|
the experimental modulation function is multiplied by this parameter.
|
|
< 1 (> 1) decreases (increases) the experimental amplitude.
|
|
the R factor is calculated using the scaled modulation function.
|
|
|
|
@param experiment: numpy.ndarray containing the experimental modulation function
|
|
|
|
@param weights: numpy.ndarray containing the experimental weights
|
|
|
|
@param theory: numpy.ndarray containing the theoretical modulation function
|
|
|
|
@return: scalar R-factor in the range from 0.0 to 2.0.
|
|
nan if any element of the function arguments is nan.
|
|
|
|
@raise ValueError if all experiments and theory values or all weights are zero.
|
|
"""
|
|
|
|
difs = weights * (scale * experiment - theory) ** 2
|
|
sums = weights * (scale ** 2 * experiment ** 2 + theory ** 2)
|
|
sum1 = difs.sum(dtype=np.float64)
|
|
sum2 = sums.sum(dtype=np.float64)
|
|
return sum1 / sum2
|
|
|
|
|
|
def optimize_rfactor(experiment: np.ndarray, theory: np.ndarray) -> float:
|
|
"""
|
|
calculate the R-factor of a calculated modulation function against the measurement, adjusting their amplitude.
|
|
|
|
if the sigma column is present in experiment and non-zero,
|
|
the R-factor terms are weighted by 1/sigma**2.
|
|
|
|
this function varies the scale of the experimental function and returns the minimum R-factor.
|
|
this is useful if the amplitudes of the two functions do not match due to systematic effects
|
|
of the calculation or the measurement.
|
|
|
|
the optimization is done in a scipy.optimize.least_squares optimization of the scaled_rfactor_func() function.
|
|
the initial guess of the scaling factor is 0.7, the constraining boundaries are 1/10 and 10.
|
|
|
|
the input arrays must have the same shape and the coordinate columns must be identical (they are ignored).
|
|
the array elements are compared element-by-element.
|
|
terms having NaN intensity are ignored.
|
|
|
|
This function can be specified in the Scan.rfactor_func parameter of the project.
|
|
|
|
@param experiment: ETPI, ETPIS, ETPAI or ETPAIS array containing the experimental modulation function.
|
|
|
|
@param theory: ETPI or ETPAI array containing the calculated modulation functions.
|
|
|
|
@return scalar R-factor in the range from 0.0 to 2.0.
|
|
|
|
@raise ValueError if the optimization fails (e.g. division by zero or all elements non-finite).
|
|
"""
|
|
sel = np.logical_and(np.isfinite(theory['i']), np.isfinite(experiment['i']))
|
|
theory = theory[sel]
|
|
experiment = experiment[sel]
|
|
if ('s' in experiment.dtype.names) and (experiment['s'].min() > 0.0):
|
|
wgts = 1.0 / experiment['s'] ** 2
|
|
else:
|
|
wgts = np.ones_like(experiment['i'])
|
|
|
|
result = so.least_squares(scaled_rfactor_func, 0.7, bounds=(0.1, 10.0), args=(experiment['i'], wgts, theory['i']))
|
|
result_r = scaled_rfactor_func(result.x, experiment['i'], wgts, theory['i'])
|
|
|
|
return result_r
|
|
|
|
|
|
def alpha_average(data: np.ndarray) -> np.ndarray:
|
|
"""
|
|
average I(alpha, theta, phi) over alpha.
|
|
|
|
@param data structured numpy.ndarray in ETPAI or ETPAIS format with a non-singular alpha dimension.
|
|
|
|
@return resulting ETPI or ETPIS data array.
|
|
"""
|
|
scan_mode, scan_positions = detect_scan_mode(data)
|
|
result = data.copy()
|
|
|
|
if len(scan_mode) == 2 and scan_mode[1] == 'a':
|
|
axis0 = scan_positions[scan_mode[0]]
|
|
n0 = len(axis0)
|
|
axis1 = scan_positions[scan_mode[1]]
|
|
n1 = len(axis1)
|
|
nd_data = np.reshape(data, (n0, n1), order='C')
|
|
|
|
nd_result = nd_data[:, 0]
|
|
names = list(nd_data.dtype.names)
|
|
names.remove('a')
|
|
for name in names:
|
|
nd_result[name] = np.mean(nd_data[name], axis=1, dtype=np.float64)
|
|
result = nd_result[names]
|
|
else:
|
|
logger.error('unsupported scan in alpha_average: {0}'.format(scan_mode))
|
|
|
|
return result
|
|
|
|
|
|
def phi_average(data: np.ndarray) -> np.ndarray:
|
|
"""
|
|
average I(theta, phi) over phi.
|
|
|
|
@param data TPI-like structured numpy.ndarray containing a hemispherical scan.
|
|
|
|
@return resulting TI or TIS data array.
|
|
"""
|
|
scan_mode, scan_positions = detect_scan_mode(data)
|
|
result = data.copy()
|
|
|
|
if scan_mode == ['t', 'p']:
|
|
t_axis = np.unique(scan_positions['t'])
|
|
nt = len(t_axis)
|
|
|
|
names = list(data.dtype.names)
|
|
names.remove('p')
|
|
dtype = [(name, data.dtype[name].str) for name in names]
|
|
result = create_data((nt,), dtype=dtype)
|
|
|
|
for i, t in enumerate(t_axis):
|
|
sel = np.abs(scan_positions['t'] - t) < 0.01
|
|
for name in names:
|
|
result[name][i] = np.mean(data[name][sel], dtype=np.float64)
|
|
else:
|
|
logger.error('unsupported scan in phi_average: {0}'.format(scan_mode))
|
|
|
|
return result
|
|
|
|
|
|
def alpha_mirror_average(data: np.ndarray) -> np.ndarray:
|
|
"""
|
|
calculate the average of I(alpha, theta, phi) and I(-alpha, theta, phi).
|
|
|
|
@param data structured numpy.ndarray in ETPAI or ETPAIS format.
|
|
for each (alpha, theta, phi) the array must contain a corresponding (-alpha, theta, phi)
|
|
within a tolerance of 0.5 degrees in alpha. otherwise, a warning is issued.
|
|
|
|
@return resulting data array, same shape as input.
|
|
the array is sorted.
|
|
"""
|
|
|
|
result1 = data.copy()
|
|
sort_data(result1)
|
|
|
|
result2 = data.copy()
|
|
try:
|
|
result2['a'] = -result2['a']
|
|
sort_data(result2)
|
|
except ValueError:
|
|
pass
|
|
|
|
if np.allclose(result1['a'], result2['a'], atol=0.5):
|
|
result1['i'] = (result1['i'] + result2['i']) / 2.0
|
|
try:
|
|
result1['s'] = np.sqrt(result1['s'] ** 2 + result2['s'] ** 2) / 2.0
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
logger.warning('asymmetric alpha scan. skipping alpha mirror average.')
|
|
|
|
return result1
|
|
|
|
|
|
if loess is not None:
|
|
default_modfunc = calc_modfunc_loess
|
|
logger.info("pmsco.data.default_modfunc = pmsco.data.calc_modfunc_loess")
|
|
else:
|
|
default_modfunc = calc_modfunc_mean
|
|
logger.warning("pmsco.data.default_modfunc = pmsco.data.calc_modfunc_mean")
|
|
|
|
default_rfactor = square_diff_rfactor
|
|
logger.info("pmsco.data.default_rfactor = pmsco.data.square_diff_rfactor") |