igor-public/pearl/pearl-pshell-import.ipf

#pragma TextEncoding = "UTF-8"
#pragma rtGlobals=3		// Use modern global access method and strict wave access.
#pragma IgorVersion = 8.00
#pragma ModuleName = PearlPShellImport
#pragma version = 2.1
#if IgorVersion() < 9.00
#include <HDF5 Browser>
#endif
#include "pearl-compat"
#include "pearl-gui-tools"
#include "pearl-area-import"

// copyright (c) 2013-22 Paul Scherrer Institut
//
// Licensed under the Apache License, Version 2.0 (the "License");
//   you may not use this file except in compliance with the License.
//   You may obtain a copy of the License at
//   http:///www.apache.org/licenses/LICENSE-2.0

/// @file
/// @brief import data from PShell
/// @ingroup ArpesPackage
///
/// HDF5 file import from the PShell data acquisition program.
///
/// the module provides two main entry functions:
///
/// - psh5_load() for almost all data loading tasks including data reduction.
/// - psh5_preview() to load a simple 1d or 2d preview of the first and most relevant dataset in the file.
///
/// @version up to igor 8, this module requires the HDF5 XOP which must be enabled manually.
/// as of igor 9 and later, HDF5 is built in.
///
/// @version in version 2.0, the interface has changed significantly.
///
/// @author matthias muntwiler, matthias.muntwiler@psi.ch
///
/// @copyright 2013-22 Paul Scherrer Institut @n
/// Licensed under the Apache License, Version 2.0 (the "License"); @n
///   you may not use this file except in compliance with the License. @n
///   You may obtain a copy of the License at
///   http://www.apache.org/licenses/LICENSE-2.0

/// @namespace PearlPShellImport
/// @brief import data from PShell
///
/// PearlPShellImport is declared in @ref pearl-pshell-import.ipf.

/// Dimension label for the energy dispersive dimension of multi-dimensional datasets
strconstant kEnergyDimLabel = "energy"

/// Dimension label for the angle dispersive dimension of multi-dimensional datasets
strconstant kAngleDimLabel = "angle"

/// Dimension label for the scan dimension of multi-dimensional datasets
strconstant kScanDimLabel = "scan"

/// Dimension label for the data dimension.
/// This label may be used to store the parameters for the `setscale d` operation.
strconstant kDataDimLabel = "data"

/// List of preferred datasets to load for preview
strconstant kPreviewDatasets = "ImageEnergyDistribution;ScientaSpectrum;ScientaImage;Counts;SampleCurrent;"

/// List of datasets that must be loaded to determine the axis scaling of a Scienta image
strconstant kScientaScalingDatasets = "LensMode;ScientaChannelBegin;ScientaChannelEnd;ScientaSliceBegin;ScientaSliceEnd;Eph;"

/// List of diagnostic datasets that are normally loaded with a scan
strconstant kEssentialDiagnostics = "ManipulatorX;ManipulatorY;ManipulatorZ;ManipulatorTheta;ManipulatorTilt;ManipulatorPhi;MonoEnergy;"

/// List of datasets that must be transposed upon loading
strconstant kTransposedDatasets = "ScientaImage;"

constant kDSCPositioners = 0x0001
constant kDSCDetectors = 0x0002
constant kDSCScientaScaling = 0x0004
constant kDSCPreview = 0x0008
constant kDSCEssentialDiags = 0x0010
constant kDSCAttrs = 0x0020
constant kDSCDiags = 0x0040
constant kDSCSnaps = 0x0080
constant kDSCMeta = 0x0100
constant kDSCMonitors = 0x0200
constant kDSCRegions = 0x0400
constant kDSCOther = 0x8000
constant kDSCAll = 0xffff


// ====== main import functions ======

/// main data loading function
///
/// load the requested elements from the given file.
///
/// scans, regions and datasets are additive.
/// wildcards can be used to select multiple or all datasets.
///
/// classes are subtractive: only datasets of specified classes are loaded.
/// by default, only positioners, detectors, scaling and essential diagnostics are loaded.
///
/// essential diags, scaling, positioners related to requested detectors are always loaded
///
/// data reduction (if specified) applies to 3d data, see psh5_load_dataset_reduced() for details.
///
/// @param	path_name		igor symbolic path name. can be empty if the path is specified in file_name or a dialog box should be displayed
///
/// @param	file_name		if empty a dialog box shows up
///
/// @param	dest_df		destination folder reference.
///								if dest_df is specified, data is loaded into this folder.
///								else, a new folder derived from the file name is created under root:
///
/// @param	scans			semicolon-separated list of scan paths to load.
///								scan groups are at the top level, their name consists of "scan", an optional space and a number.
///								all datasets in the group and sub-groups are considered for loading unless excluded by other arguments.
///								if empty, no datasets are loaded based on their relation to a scan.
///								names are matched by Igor's StringMatch function.
///								the matching is insensitive to case and spaces.
///								to load all scans, pass "/scan*".
///								the leading slash before "scan" can be omitted.
///
/// @param	regions		semicolon-separated list of region paths to load.
///								region groups are children of scan groups, their name consists of "region", an optional space and a number.
///								all datasets in the group and sub-groups are considered for loading unless excluded by other arguments.
///								if empty, no datasets are loaded based on their relation to a region.
///								names are matched by Igor's StringMatch function.
///								the matching is insensitive to case and spaces.
///								to load all regions of scan 1, pass "/scan1/region*".
///								to load regions 1 of all scans, pass "/scan*/region1".
///								the leading slash before "scan" can be omitted.
///
/// @param	datasets		semicolon-separated list of dataset paths to load.
///								this allows to load individual datasets.
///								names are matched by Igor's StringMatch function against full dataset paths.
///								to load all datasets named "SampleCurrent", pass "*/SampleCurrent".
///								the matching is insensitive to case and spaces.
///								additional datasets may be loaded for scaling.
///
/// @param	classes		filter datasets (that were selected by the scans, regions and datasets arguments) by class.
///								this allows, for example, to exclude the diagnostics.
///								note that scaling datasets are always loaded.
///								the value is a bit-wise switch, typically the arithmetic disjunction of kDSCXxxx constants.
///								by default, only positioners, detectors, scaling and essential diagnostics are loaded.
///								to completely load all datasets, specify kDSCAll.
///
/// @param	max_rank		load only datasets with lower or equal rank.
///
/// @return					data folder reference of the file-level data folder. same as dest_df if specified.
///
/// @return					global string s_filepath in new data folder contains the full file path on disk.
///
/// @return					global string s_scanpaths in new data folder contains a list of scan groups inside the file.
///
/// @return					global string s_loaded_datasets in new data folder contains a list of loaded datasets.
///								the items are full group paths of the HDF5 file.
///								dataset paths can be mapped to loaded data folders using the psh5_dataset_to_folder function.
///
function /df psh5_load(path_name, file_name, scans, regions, datasets, [classes, max_rank, reduction_func, reduction_params, dest_df])
	string path_name
	string file_name
	string scans
	string regions
	string datasets
	variable classes
	variable max_rank
	string reduction_func
	string reduction_params
	dfref dest_df

	dfref save_df = GetDataFolderDFR()
	variable timerRefNum = startMSTimer

	if (ParamIsDefault(classes) || (classes == 0))
		classes = kDSCPositioners | kDSCDetectors | kDSCScientaScaling | kDSCEssentialDiags
	endif
	variable essential_classes = kDSCPositioners | kDSCScientaScaling | kDSCEssentialDiags

	if (ParamIsDefault(dest_df) || !DataFolderRefStatus(dest_df))
		dest_df = psh5_open_file(path_name, file_name)
	else
		dest_df = psh5_open_file(path_name, file_name, dest_df=dest_df)
	endif
	if (ParamIsDefault(reduction_func))
		reduction_func = ""
	endif
	if (ParamIsDefault(reduction_params))
		reduction_params = ""
	endif

	if (DataFolderRefStatus(dest_df))
		setdatafolder dest_df
		psh5_load_general_group(dest_df)

		// datasets contained in file
		svar /sdfr=dest_df file_datasets = s_datasets

		// datasets contained in file up to allowed rank
		string ranked_datasets = ""
		if (ParamIsDefault(max_rank))
			ranked_datasets = file_datasets
		else
			svar /sdfr=dest_df file_ranks = s_datasets_ranks
			ranked_datasets = psh5_filter_datasets_rank(file_datasets, file_ranks, 0, max_rank)
		endif

		string matching_datasets = ""
		string matching_essentials = ""
		string scan_datasets = ""
		string region_datasets = ""
		string free_datasets = ""
		string selected_datasets = ""
		string essential_datasets = ""

		variable i_item
		variable n_items
		string item

		// select datasets belonging to selected scans
		n_items = ItemsInList(scans, ";")
		for (i_item = 0; i_item < n_items; i_item += 1)
			item = StringFromList(i_item, scans, ";")
			if (cmpstr(item[0,3], "scan") == 0)
				item = "/" + item
			endif
			item = ReplaceString("//", item + "/*", "/")
			matching_datasets = psh5_match_datasets(ranked_datasets, item)
			scan_datasets = scan_datasets + matching_datasets
		endfor

		// select datasets belonging to selected regions
		n_items = ItemsInList(regions, ";")
		for (i_item = 0; i_item < n_items; i_item += 1)
			item = StringFromList(i_item, regions, ";")
			if (cmpstr(item[0,3], "scan") == 0)
				item = "/" + item
			endif
			item = ReplaceString("//", item + "/*", "/")
			matching_datasets = psh5_match_datasets(ranked_datasets, item)
			region_datasets = region_datasets + matching_datasets
		endfor

		// free select datasets
		n_items = ItemsInList(datasets, ";")
		for (i_item = 0; i_item < n_items; i_item += 1)
			item = StringFromList(i_item, datasets, ";")
			if (cmpstr(item[0,3], "scan") == 0)
				item = "/" + item
			endif
			matching_datasets = psh5_match_datasets(ranked_datasets, item)
			free_datasets = free_datasets + matching_datasets
		endfor

		selected_datasets = scan_datasets + region_datasets + free_datasets

		string filtered_datasets = ""
		string diag_datasets = ""
		string selected_scans = psh5_extract_scan_paths(selected_datasets)
		variable i_scan
		variable n_scans = ItemsInList(selected_scans)
		string scan
		string selected_regions = psh5_extract_region_paths(selected_datasets)
		variable i_region
		variable n_regions = ItemsInList(selected_regions)
		string region
		string positioners
		string detectors

		// datasets directly under one of the selected regions
		region_datasets = ""
		for (i_region = 0; i_region < n_regions; i_region += 1)
			region = StringFromList(i_region, selected_regions, ";")
			region_datasets = region_datasets + GrepList(file_datasets, "(?i)^" + region + "[[:alpha:]]+$")
		endfor

		// filter selected datasets by class and add essential dependencies
		// each scan may have specific positioners and detectors
		for (i_scan = 0; i_scan < n_scans; i_scan += 1)
			scan = StringFromList(i_scan, selected_scans, ";")

			// potentially interesting diagnostics of current scan and selected regions
			diag_datasets = psh5_match_datasets(file_datasets, scan + "*")
			diag_datasets = psh5_match_dataset_classes(diag_datasets, kDSCAttrs | kDSCDiags | kDSCSnaps)
			diag_datasets = diag_datasets + GrepList(file_datasets, "(?i)^" + scan + "[[:alpha:]]+$")
			diag_datasets = diag_datasets + psh5_match_datasets(region_datasets, scan + "*")

			// explicit positioners and detectors set by pshell
			setdatafolder dest_df
			dfref scan_df = psh5_create_folders(scan)
			setdatafolder scan_df
			psh5_load_scan_meta(dest_df, scan)
			wave /t /z ScanWritables
			wave /t /z ScanReadables
			if (WaveExists(ScanWritables))
				positioners = twave2list(ScanWritables, ";")
			else
				positioners = ""
			endif
			if (WaveExists(ScanReadables))
				detectors = twave2list(ScanReadables, ";")
			else
				detectors = ""
			endif

			// filtering by classes
			matching_datasets = psh5_match_dataset_classes(selected_datasets, classes, positioners=positioners, detectors=detectors)

			// add essential diags
			if (strlen(matching_datasets) > 1)
				essential_datasets = psh5_match_dataset_classes(diag_datasets, essential_classes, positioners=positioners, detectors=detectors)
			endif

			// scaling datasets before detectors because data reduction needs the scales
			filtered_datasets = essential_datasets + filtered_datasets + matching_datasets
		endfor

		// load the datasets
		setdatafolder dest_df
		string /g s_loaded_datasets = ""
		s_loaded_datasets = psh5_load_datasets(dest_df, filtered_datasets, reduction_func=reduction_func, reduction_params=reduction_params)

		// apply scaling by scan
		for (i_scan = 0; i_scan < n_scans; i_scan += 1)
			scan = StringFromList(i_scan, selected_scans, ";")
			ps_scale_datasets(psh5_dataset_to_folder(dest_df, scan))
		endfor

		psh5_close_file(dest_df)

		// performance reporting
		if (timerRefNum >= 0)
			setdatafolder dest_df
			variable /g psh5_perf_secs
			psh5_perf_secs = stopMSTimer(timerRefNum) / 1e6
		endif
	endif

	setdatafolder save_df
	return dest_df
end

/// load preview
///
/// load information about the file structure and a preview dataset
///
function /df psh5_preview(path_name, file_name, [dest_df, preview_datasets])
	string path_name
	string file_name
	dfref dest_df
	string preview_datasets

	dfref save_df = GetDataFolderDFR()

	if (ParamIsDefault(dest_df))
		dest_df = psh5_open_file(path_name, file_name)
	else
		dest_df = psh5_open_file(path_name, file_name, dest_df=dest_df)
	endif
	if (ParamIsDefault(preview_datasets))
		preview_datasets = kPreviewDatasets
	endif
	variable essential_classes = kDSCPositioners | kDSCScientaScaling

	if (DataFolderRefStatus(dest_df))
		setdatafolder dest_df
		psh5_load_general_group(dest_df)

		// select dataset based on preference
		svar /sdfr=dest_df file_datasets = s_datasets
		svar /sdfr=dest_df file_datasets_ranks = s_datasets_ranks
		string selected_datasets = ""
		string essential_datasets = ""
		string scan_datasets = ""
		string filtered_datasets = ""

		variable nds = ItemsInList(preview_datasets, ";")
		variable ids
		string ds
		for (ids = 0; ids < nds; ids += 1)
			ds = StringFromList(ids, preview_datasets, ";")
			selected_datasets = psh5_filter_datasets_rank(file_datasets, file_datasets_ranks, 1, 2)
			selected_datasets = psh5_match_datasets(selected_datasets, "/scan*" + ds)
			if (strlen(selected_datasets) > 1)
				selected_datasets = StringFromList(0, selected_datasets, ";")
				break
			endif
		endfor

		// add essential dependencies
		if (strlen(selected_datasets) > 1)
			string selected_scans = psh5_extract_scan_paths(selected_datasets)
			string scan
			string positioners

			scan = StringFromList(0, selected_scans, ";")
			scan_datasets = psh5_match_datasets(file_datasets, scan + "*")

			psh5_load_scan_meta(dest_df, scan)
			wave /t /z ScanWritables
			if (WaveExists(ScanWritables))
				positioners = twave2list(ScanWritables, ";")
			else
				positioners = ""
			endif

			essential_datasets = psh5_match_dataset_classes(scan_datasets, essential_classes, positioners=positioners)
			filtered_datasets = essential_datasets + selected_datasets

			// load the datasets
			psh5_load_datasets(dest_df, filtered_datasets, create_folders=0)
			ps_scale_datasets(dest_df)
			string /g s_preview_dataset = StringFromList(0, selected_datasets, ";")
			string /g s_preview_wave = StringFromList(ItemsInList(s_preview_dataset, "/") - 1, s_preview_dataset, "/")
		endif

		psh5_close_file(dest_df)
	endif

	setdatafolder save_df
	return dest_df
end

/// open a HDF5 file created by the PShell data acquisition program and prepare the data folder.
///
/// the function opens a specified or interactively selected HDF5 file,
/// and loads general information about the file
/// including a list of contained datasets.
///
/// data can be loaded into an existing or new data folder under root.
///
/// the file must be closed by psh5_close_file() after use.
/// the HDF5 file ID is stored in the global variable file_id until the file is closed.
///
/// @param	path_name		igor symbolic path name. can be empty if the path is specified in FileName or a dialog box should be displayed
///
/// @param	file_name		if empty a dialog box shows up
///
/// @param	dest_df		destination folder reference.
///								if dest_df is specified, data is loaded into this folder.
///								else, by default, a new folder derived from the file name is created in root:
///
/// @return					the return value of the function is a data folder reference of the created data folder.
///
/// @return					global variable file_id contains ID number of open HDF5 file from HDF5OpenFile.
///								zero if an error occurred.
///
/// @return					global string s_filepath in new data folder contains the full file path on disk.
///
/// @return					global string s_scanpaths in new data folder contains a list of scan groups inside the file.
///
function /df psh5_open_file(path_name, file_name, [dest_df])
	string path_name
	string file_name
	dfref dest_df

	dfref save_df = GetDataFolderDFR()

	variable fid
	HDF5OpenFile /P=$path_name /R fid as file_name
	if (v_flag == 0)
		if (!ParamIsDefault(dest_df))
			setdatafolder dest_df
		else
			string dest_name = ad_suggest_foldername(s_filename, sourcename="psh")
			setdatafolder root:
			newdatafolder /s /o $("root:" + dest_name)
		endif
		dfref file_df = GetDataFolderDFR()

		variable /g file_id = fid
		string /g s_filepath
		string /g s_scanpaths
		string /g s_datasets
		string datatypes
		string ranks
		string dimensions
		s_filepath = s_path + s_filename
		s_scanpaths = psh5_list_scans(file_id)
		s_datasets = psh5_list_all_datasets(file_id)
		[datatypes, ranks, dimensions] = psh5_list_dataset_info(file_id, s_datasets)
		string /g s_datasets_datatypes = datatypes
		string /g s_datasets_ranks = ranks
		string /g s_datasets_dimensions = dimensions
	else
		dfref file_df = $""
	endif

	setdatafolder save_df
	return file_df
end

/// close a HDF5 file opened by psh5_open_file.
///
/// this function just closes the HDF5 file.
/// no change is made to the loaded data.
///
/// the function requires the specified data folder to contain a variable named `file_id`
/// that specifies the HDF5 file ID.
/// the variable may also be in a parent folder.
/// the variable is killed after the file has been closed.
/// if the folder or variable can't be found, the function does nothing.
///
/// @param	file_df		data folder reference of open HDF5 file from psh5_open_file().
///							the reference may also point to a child folder,
///							the function will look for a file_id variable in all parent folders.
///
/// @note on the command line, data folder references can be specified using the $-notation like `$"foldername"`.
///       the current folder is written as `$":"`.
///
function psh5_close_file(file_df)
	dfref file_df

	if (DataFolderRefStatus(file_df))
		nvar /sdfr=file_df /z file_id
		if (nvar_Exists(file_id))
			HDF5CloseFile /z file_id
			file_id = 0
			KillVariables /z file_id
		else
			dfref parent_df = $(GetDataFolder(1, file_df) + ":")
			if (DataFolderRefStatus(parent_df))
				psh5_close_file(parent_df)
			endif
		endif
	endif
end

// === datasets and paths ===

/// convert text wave to list.
///
///
static function /s twave2list(wt, sep)
	wave /t wt
	string sep

	string list = ""
	variable n = numpnts(wt)
	variable i
	for (i = 0; i < n; i += 1)
		list = AddListItem(wt[i], list, sep, inf)
	endfor

	return list
end

/// convert numeric wave to list.
///
///
static function /s wave2list(w, format, sep)
	wave w
	string format
	string sep

	string list = ""
	variable n = numpnts(w)
	variable i
	string s
	for (i = 0; i < n; i += 1)
		sprintf s, format, w[i]
		list = AddListItem(s, list, sep, inf)
	endfor

	return list
end

/// list scan groups of a PShell data file.
///
/// the function returns a list of all top-level groups whose name starts with "scan".
///
/// @param	file_id			ID of open HDF5 file from psh5_open_file().
///
/// @return					semicolon-separated list of group paths.
///
function /s psh5_list_scans(file_id)
	variable file_id

	HDF5ListGroup /F /TYPE=1 file_id, "/"

	variable ig
	variable ng = ItemsInList(S_HDF5ListGroup, ";")
	string sg
	string scans = ""

	for (ig = 0; ig < ng; ig += 1)
		sg = StringFromList(ig, S_HDF5ListGroup, ";")
		if (cmpstr(sg[1,4], "scan") == 0)
			scans = AddListItem(sg, scans, ";", inf)
		endif
	endfor

	return scans
end

/// list all datasets in a file
///
/// the function returns a list of all datasets in a file.
/// each dataset is listed by its full path like, e.g., "/scan 1/region 1/dataset 1".
///
/// this function wraps a one-line HDF5 operation and is provided just to be more memorable.
///
/// @param	file_id			ID of open HDF5 file from psh5_open_file().
///
/// @return					semicolon-separated list of absolute dataset paths.
///
function /s psh5_list_all_datasets(file_id)
	variable file_id

	HDF5ListGroup /F /R /TYPE=2 /Z file_id, "/"
	if (!v_flag)
		return S_HDF5ListGroup
	else
		return ""
	endif
end

/// list data types and dimensions of datasets
///
/// this function has multiple returns.
///
/// @param	file_id			ID of open HDF5 file from psh5_open_file().
///
/// @return					semicolon-separated list of (simplified) datatypes.
///								datatypes are marked as "i" (integer), "f" (float), "s" (string) or "?" (unknown).
///
/// @return					semicolon-separated list of ranks (number of dimensions).
///
/// @return					semicolon-separated list of dimensions.
///								each item is a comma-separated list of dimension sizes.
///								items do not contain trailing commas.
///
function [string datatypes, string ranks, string dimensions] psh5_list_dataset_info(variable file_id, string datasets)
	variable nds = ItemsInList(datasets, ";")
	variable ids
	string sds
	STRUCT HDF5DataInfo di
	InitHDF5DataInfo(di)
	variable err
	variable idim
	string sdims
	datatypes = ""
	ranks = ""
	dimensions = ""

	for (ids = 0; ids < nds; ids += 1)
		sds = StringFromList(ids, datasets, ";")
		err = HDF5DatasetInfo(file_id, sds, 0, di)
		if (err == 0)
			switch (di.datatype_class)
				case H5T_INTEGER:
					datatypes = AddListItem("i", datatypes, ";", ids)
					break
				case H5T_FLOAT:
					datatypes = AddListItem("f", datatypes, ";", ids)
					break
				case H5T_STRING:
					datatypes = AddListItem("s", datatypes, ";", ids)
					break
				default:
					datatypes = AddListItem("?", datatypes, ";", ids)
					break
			endswitch

			ranks = AddListItem(num2str(di.ndims), ranks, ";", ids)

			sdims = ""
			for (idim = 0; idim < di.ndims; idim += 1)
				sdims = AddListItem(num2str(di.dims[idim]), sdims, ",", idim)
			endfor
			if (strlen(sdims) > 1)
				sdims = sdims[0, strlen(sdims)-2]
			endif
			dimensions = AddListItem(sdims, dimensions, ";", ids)
		endif
	endfor
end

/// filter a list of datasets by string matching
///
/// this function can be used to extract certain dataset paths
/// from a list of all datasets in a file.
/// the matching is insensitive to spaces and case.
///
/// examples match strings:
/// - `"*/scan1/region1/*"`	match all datasets in scan 1, region 1
/// - `"!*/diags/*"` 			remove diagnostics from list
///
/// @param datasets		semicolon separated list of dataset paths
/// @param match			match string for igor's StringMatch function
///
/// @return				list of matching datasets
///
function /s psh5_match_datasets(datasets, match)
	string datasets
	string match

	string result = ""

	string spaceless_datasets = ReplaceString(" ", datasets, "")
	string spaceless_match = ReplaceString(" ", match, "")

	string sep = ";"
	variable seplen = strlen(sep)
	variable nds = ItemsInList(spaceless_datasets, sep)
	variable ids
	string ds
	variable offset = 0

	for (ids = 0; ids < nds; ids += 1)
		ds = StringFromList(0, spaceless_datasets, sep, offset)
		offset += strlen(ds) + seplen
		if (StringMatch(ds, spaceless_match))
			ds = StringFromList(ids, datasets, sep, 0)
			result = AddListItem(ds, result, sep, inf)
		endif
	endfor

	return result
end

/// filter datasets by rank
///
/// @param datasets		semicolon-separated list of datasets to be checked.
///
/// @param ranks			semicolon-separated list of ranks of each dataset.
///
/// @return				filtered dataset list.
///
function /s psh5_filter_datasets_rank(datasets, ranks, min_rank, max_rank)
	string datasets
	string ranks
	variable min_rank
	variable max_rank

	string result = ""
	string sep = ";"
	variable seplen = strlen(sep)
	variable nds = ItemsInList(datasets, sep)
	variable ids
	string ds
	variable offset = 0
	variable rank

	for (ids = 0; ids < nds; ids += 1)
		ds = StringFromList(0, datasets, sep, offset)
		offset += strlen(ds) + seplen
		rank = str2num(StringFromList(ids, ranks, sep))
		if ((rank >= min_rank) && (rank <= max_rank))
			result = AddListItem(ds, result, sep, inf)
		endif
	endfor

	return result
end

/// remove duplicate items from list
///
/// @param list		semicolon-separated list of strings.
///						strings can contain any printable character except the semicolon.
///
/// @return			list of strings with duplicates (second and further instances) removed.
///						all remaining items retain the position of their first occurrence in the original list.
///						the function uses Igor's FindDuplicates operation.
///
static function /s unique_strings(list)
	string list

	string result = ""

	string sep = ";"
	variable seplen = strlen(sep)
	variable nn = ItemsInList(list, sep)
	variable ii
	string item
	variable offset = 0

	make /n=(nn) /t /free wt_in
	for (ii = 0; ii < nn; ii += 1)
		item = StringFromList(0, list, sep, offset)
		offset += strlen(item) + seplen
		wt_in[ii] = item
	endfor

	FindDuplicates /Z /FREE /RT=wt_out wt_in

	return twave2list(wt_out, ";")
end

/// trim dataset paths to the scan part
///
/// return dataset paths stripped to the form /scan*/.
///
/// the function matches each path for a scan token in the first path element
/// and strips off the remaining path.
/// if there are no scan-based datasets, the function returns an empty string.
///
/// the function operates on a single path or a semicolon-separated list of paths.
/// the items of the returned list are unique.
///
/// @param datasets		semicolon separated list of dataset paths
///
/// @return				list of scan paths (no duplicates)
///
function /s psh5_extract_scan_paths(datasets)
	string datasets

	string result = ""
	string sep = ";"
	variable seplen = strlen(sep)
	variable nds = ItemsInList(datasets, sep)
	variable ids
	string ds
	string scan
	string item
	variable offset = 0

	for (ids = 0; ids < nds; ids += 1)
		ds = StringFromList(0, datasets, sep, offset)
		offset += strlen(ds) + seplen
		if (cmpstr(ds[0], "/") != 0)
			ds = "/" + ds
		endif

		scan = StringFromList(1, ds, "/")
		if (StringMatch(scan, "scan*"))
			item = "/" + scan + "/"
		else
			item = ""
		endif

		if ((strlen(item) > 0) && (WhichListItem(item, result, ";", 0, 0) < 0))
			result = AddListItem(item, result, ";", inf)
		endif
	endfor

	return result
end

/// trim dataset paths to the scan/region part
///
/// return dataset paths stripped to the form /scan*/region*/.
///
/// the function matches each path for scan and region tokens in the first two path elements
/// and strips off the remainder.
/// if there are no region-based datasets, the function returns an empty string.
///
/// the function operates on a single path or a semicolon-separated list of paths.
/// the items of the returned list are unique.
///
/// @param datasets		semicolon separated list of dataset paths
///
/// @return				list of scan/region paths (no duplicates)
///
function /s psh5_extract_region_paths(datasets)
	string datasets

	string result = ""
	string sep = ";"
	variable seplen = strlen(sep)
	variable nds = ItemsInList(datasets, sep)
	variable ids
	string ds
	string scan
	string region
	string item
	variable offset = 0

	for (ids = 0; ids < nds; ids += 1)
		ds = StringFromList(0, datasets, sep, offset)
		offset += strlen(ds) + seplen
		if (cmpstr(ds[0], "/") != 0)
			ds = "/" + ds
		endif

		scan = StringFromList(1, ds, "/")
		region = StringFromList(2, ds, "/")
		if (StringMatch(scan, "scan*") && StringMatch(region, "region*"))
			item = "/" + scan + "/" + region + "/"
		else
			item = ""
		endif

		if ((strlen(item) > 0) && (WhichListItem(item, result, ";", 0, 0) < 0))
			result = AddListItem(item, result, ";", inf)
		endif
	endfor

	return result
end

/// filter a list of datasets by classification
///
/// @param datasets		semicolon separated list of dataset paths
///
/// @param	classes	dataset classes.
///							arithmetic OR of the kDSCXxxx constants.
///
/// @return				list of scan/region paths (no duplicates)
///
function /s psh5_match_dataset_classes(datasets, classes, [positioners, detectors])
	string datasets
	variable classes
	string positioners
	string detectors

	if (ParamIsDefault(positioners))
		positioners = ""
	endif
	if (ParamIsDefault(detectors) || (strlen(detectors) == 0))
		detectors = kPreviewDatasets
	endif

	string result = ""
	string sep = ";"
	variable seplen = strlen(sep)
	variable nds = ItemsInList(datasets, sep)
	variable ids
	variable offset = 0
	string ds
	variable nparts
	string ds_parent
	string ds_name
	string ds_scan_rel
	variable ds_class

	for (ids = 0; ids < nds; ids += 1)
		ds = StringFromList(0, datasets, sep, offset)
		offset += strlen(ds) + seplen
		nparts = ItemsInList(ds, "/")
		ds_parent = StringFromList(nparts - 2, ds, "/")
		ds_name = StringFromList(nparts - 1, ds, "/")
		if (cmpstr(ds[0,4], "/scan") == 0)
			ds_scan_rel = RemoveListItem(0, ds[1, strlen(ds) - 1], "/")
		else
			ds_scan_rel = ds
		endif
		ds_class = 0

		if (strlen(ds_parent) > 0)
			ds_class = ds_class | (cmpstr(ds_parent, "attr") == 0 ? kDSCAttrs : 0)
			ds_class = ds_class | (cmpstr(ds_parent, "attrs") == 0 ? kDSCAttrs : 0)
			ds_class = ds_class | (cmpstr(ds_parent, "diags") == 0 ? kDSCDiags : 0)
			ds_class = ds_class | (cmpstr(ds_parent, "snaps") == 0 ? kDSCSnaps : 0)
			ds_class = ds_class | (cmpstr(ds_parent, "meta") == 0 ? kDSCMeta : 0)
			ds_class = ds_class | (cmpstr(ds_parent, "monitors") == 0 ? kDSCMonitors : 0)
			ds_class = ds_class | (cmpstr(ds_parent[0,5], "region") == 0 ? kDSCRegions : 0)
		endif

		if (strlen(ds_name) > 0)
			ds_class = ds_class | (WhichListItem(ds_scan_rel, positioners, sep, 0, 0) >= 0 ? kDSCPositioners : 0)
			ds_class = ds_class | (WhichListItem(ds_scan_rel, detectors, sep, 0, 0) >= 0 ? kDSCDetectors : 0)
			ds_class = ds_class | (WhichListItem(ds_name, kPreviewDatasets, sep, 0, 0) >= 0 ? kDSCPreview : 0)
			ds_class = ds_class | (WhichListItem(ds_name, kScientaScalingDatasets, sep, 0, 0) >= 0 ? kDSCScientaScaling : 0)
			ds_class = ds_class | (WhichListItem(ds_name, kEssentialDiagnostics, sep, 0, 0) >= 0 ? kDSCEssentialDiags : 0)
		endif

		if (ds_class == 0)
			ds_class = kDSCOther
		endif

		if (ds_class & classes)
			result = AddListItem(ds, result, sep, inf)
		endif
	endfor

	return result
end

/// create all data folders along a dataset path
///
///	 if the path ends with a slash, the path is interpreted as a group path,
///	 and each part is mapped to a data folder.
///	 else, the last part of the path is the name of a dataset
///	 and will not produce a folder.
///
/// the path will always be interpreted as starting from the root,
/// regardless whether it starts with a slash or not.
///
/// spaces are removed from folder names, and the names are cleaned up to produce simple names.
///
/// a string variable named "s_hdf5_group" is added to each created folder
/// and contains the incremental path.
///
/// the first child folder is created in the current data folder.
/// at the end, the lowest child folder is selected and returned as the function result.
///
/// @param	datasetpath		hdf5 group path to dataset, e.g. "/scan 1/region 1/ScientaImage".
///
/// @return	data folder reference of the lowest child folder.
///
function /df psh5_create_folders(datasetpath)
	string datasetpath

	if (cmpstr(datasetpath[0], "/") == 0)
		datasetpath = datasetpath[1, strlen(datasetpath)-1]
	endif
	if (cmpstr(datasetpath[strlen(datasetpath)-1], "/") == 0)
		datasetpath += "dummy"
	endif

	variable nfolders = ItemsInList(datasetpath, "/") - 1
	variable ifolder
	string folder
	string inc_path = "/"

	for (ifolder = 0; ifolder < nfolders; ifolder += 1)
		folder = StringFromList(ifolder, datasetpath, "/")
		folder = ps_fix_folder_name(folder)
		NewDataFolder /o/s $folder
		inc_path += folder
		inc_path += "/"
		string /g s_hdf5_group = inc_path
	endfor

	return GetDataFolderDFR()
end

/// map dataset path to datafolder path
///
///	 if the path ends with a slash, the path is interpreted as a group path,
///	 and each part maps to a data folder.
///	 if the last part of the path is the name of a dataset, it is discarded.
///
/// spaces are removed from folder names, and the names are cleaned up to produce simple names.
///
/// the path is interpreted as relative to the specified parent data folder.
/// regardless whether it starts with a slash or not.
///
/// @param	datasetpath		hdf5 group path to dataset, e.g. "/scan 1/region 1/ScientaImage".
///
/// @param	parent_df			parent data folder
///
/// @return	data folder reference
///
function /df psh5_dataset_to_folder(parent_df, datasetpath)
	dfref parent_df
	string datasetpath

	if (cmpstr(datasetpath[0], "/") == 0)
		datasetpath = datasetpath[1, strlen(datasetpath)-1]
	endif
	if (cmpstr(datasetpath[strlen(datasetpath)-1], "/") == 0)
		datasetpath += "dummy"
	endif

	variable nfolders = ItemsInList(datasetpath, "/") - 1
	variable ifolder
	string folder
	string inc_path = ""

	for (ifolder = 0; ifolder < nfolders; ifolder += 1)
		folder = StringFromList(ifolder, datasetpath, "/")
		folder = ps_fix_folder_name(folder)
		if (ifolder)
			inc_path += ":"
		endif
		inc_path += folder
	endfor

	dfref out_df = parent_df:$inc_path
	return out_df
end

/// convert HDF5 group name to data folder name and fix compatibility issues
///
function /s ps_fix_folder_name(group_name)
	string group_name
	string folder_name

	folder_name = ReplaceString(" ", group_name, "")
	if (cmpstr(folder_name, "attrs") == 0)
		folder_name = "attr"
	endif
	folder_name = PearlCleanupName(folder_name)

	return folder_name
end

// ====== import functions ======

/// load multiple datasets from open file
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @param	create_folders	if 1 (default), data folders according to the group path are created.
///									if 0, the dataset is loaded into the current folder.
///									the latter option should be used with care because datasets with same names may be overwritten.
///
/// @param reduction_func		data reduction function.
///									three-dimensional datasets can be reduced in dimensionality by on-the-fly data reduction.
///									by default (or if empty string), no reduction is applied.
/// 								see @ref psh5_load_dataset_reduced().
///
/// @param reduction_params	parameter string for the reduction function.
///
/// @return (string) semicolon-separated list of loaded datasets
///
function /s psh5_load_datasets(file_df, datasets, [create_folders, reduction_func, reduction_params])
	dfref file_df
	string datasets
	variable create_folders
	string reduction_func
	string reduction_params

	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	if (ParamIsDefault(create_folders))
		create_folders = 1
	endif
	if (ParamIsDefault(reduction_func))
		reduction_func = ""
	endif
	if (ParamIsDefault(reduction_params))
		reduction_params = ""
	endif

	dfref save_df = GetdataFolderDFR()

	datasets = unique_strings(datasets)
	variable nds = ItemsInList(datasets, ";")
	variable ids
	string ds
	string loaded_datasets = ""
	string loaded_waves = ""

	for (ids = 0; ids < nds; ids += 1)
		SetDataFolder file_df
		ds = StringFromList(ids, datasets, ";")
		loaded_waves = psh5_load_dataset(file_df, ds, create_folders=create_folders, reduction_func=reduction_func, reduction_params=reduction_params)
		if (strlen(loaded_waves) > 1)
			loaded_datasets = loaded_datasets + ds + ";"
		endif
	endfor

	setdatafolder save_df
	return loaded_datasets
end

/// load a dataset from an open PShell HDF5 file.
///
/// if the dataset has a maximum of two dimensions, the function loads it at once.
/// if it has more than two dimension, the function calls psh5_load_dataset_slabs() to load the data slab by slab.
///
/// - the metadata (HDF5 attributes) are loaded into the wave note, cf. psh5_load_dataset_meta().
/// - dimension labels are set according the dataset name, cf. ps_set_dimlabels().
///
/// the dataset is loaded into the current data folder or a tree based on the group path given in the datasetpath argument.
/// the function returns from the original data folder.
///
/// only numeric and string data types are supported, string datasets must have rank 1.
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @param	datasetpath	group path and name of the dataset, e.g. "/scan 1/ScientaImage".
///								HDF5 groups map to igor data folders below the current data folder,
///								the wave is placed into the leaf folder.
///								the names of groups and waves are cleaned up to produce simple names,
///								in particular, spaces and other illegal characters are removed.
///
/// @param	create_folders	if 1 (default), data folders according to the group path are created.
///									if 0, the dataset is loaded into the current folder.
///
/// @param reduction_func		data reduction function.
///									three-dimensional datasets can be reduced in dimensionality by on-the-fly data reduction.
///									by default (or if empty string), no reduction is applied.
/// 								see @ref psh5_load_dataset_reduced().
///
/// @param reduction_params	parameter string for the reduction function.
///
/// @return	semicolon-separated list of loaded wave names.
///				multiple waves are loaded if the dataset has a compound data type.
///				in that case the wave name is a concatenation of the dataset and field names (see HDF5LoadData).
///
function /s psh5_load_dataset(file_df, datasetpath, [create_folders, reduction_func, reduction_params])
	dfref file_df
	string datasetpath
	variable create_folders
	string reduction_func
	string reduction_params

	dfref base_df = GetDataFolderDFR()
	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	if (ParamIsDefault(create_folders))
		create_folders = 1
	endif
	if (create_folders)
		psh5_create_folders(datasetpath)
	endif
	if (ParamIsDefault(reduction_func))
		reduction_func = ""
	endif
	if (ParamIsDefault(reduction_params))
		reduction_params = ""
	endif

	STRUCT HDF5DataInfo di	// Defined in HDF5 Browser.ipf.
	InitHDF5DataInfo(di)
	variable err = HDF5DatasetInfo(file_id, datasetpath, 0, di)
	if (err != 0)
		// error accessing data
		return ""
	endif

	variable numeric = 0
	variable compound = 0

	switch (di.datatype_class)
		case H5T_INTEGER:
		case H5T_FLOAT:
			numeric = 1
			break
		case H5T_STRING:
			numeric = 0
			break
		case H5T_COMPOUND:
			compound = 1
			break
		case H5T_TIME:
		case H5T_BITFIELD:
		case H5T_OPAQUE:
		case H5T_REFERENCE:
		case H5T_ENUM:
		case H5T_VLEN:
		case H5T_ARRAY:
		default:
			// unsupported data type
			return ""
	endswitch

	string wave_names = ""
	if (di.ndims <= 2)
		string datasetname = StringFromList(ItemsInList(datasetpath, "/") - 1, datasetpath, "/")
		variable transpose = WhichListItem(datasetname, kTransposedDatasets) >= 0
		HDF5LoadData /O /Q /Z /TRAN=(transpose) file_id, datasetpath
		wave_names = S_waveNames
	elseif (numeric)
		if (exists(reduction_func) == 6)
			wave_names = psh5_load_dataset_reduced(file_df, datasetpath, $reduction_func, reduction_params, create_folders=0)
		else
			wave_names = psh5_load_dataset_slabs(file_df, datasetpath, create_folders=0)
		endif
	endif

	variable nw = ItemsInList(wave_names, ";")
	variable iw
	string sw
	string loaded_waves = ""
	for (iw = 0; iw < nw; iw += 1)
		sw = StringFromList(iw, wave_names)
		wave /z w = $sw
		if (WaveExists(w))
			loaded_waves = loaded_waves + sw + ";"
			ps_set_dimlabels(w)
			psh5_load_dataset_meta(file_df, datasetpath, w)
		endif
	endfor

	setdatafolder base_df
	return loaded_waves
end

/// load a dataset slab-wise from the open PShell HDF5 file.
///
/// the function loads the dataset image by image using the hyperslab option.
/// the wave is loaded into the current data folder.
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @param	datasetpath	group path and name of the dataset.
///								the dataset name defines the name of the loaded wave (after cleaning up).
///
/// @param	progress		select whether a progress window is displayed during the process.
///								@arg	1	(default) show progress window.
///								@arg	0	do not show progress window.
///
/// @param	create_folders	if 1 (default), data folders according to the group path are created.
///									if 0, the dataset is loaded into the current folder.
///
/// @return	semicolon-separated list of loaded wave names.
///				in the current version, the function returns zero or one wave, as it does not support compound types.
///
function /s psh5_load_dataset_slabs(file_df, datasetpath, [create_folders, progress])
	dfref file_df
	string datasetpath
	variable create_folders
	variable progress

	if (ParamIsDefault(create_folders))
		create_folders = 1
	endif
	if (ParamIsDefault(progress))
		progress = 1
	endif

	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	variable result = 0
	string datawavename = StringFromList(ItemsInList(datasetpath, "/") - 1, datasetpath, "/")

	if (create_folders)
		psh5_create_folders(datasetpath)
	endif

	STRUCT HDF5DataInfo di	// Defined in HDF5 Browser.ipf.
	InitHDF5DataInfo(di)
	variable err = HDF5DatasetInfo(file_id, datasetpath, 0, di)
	if (err != 0)
		print "error accessing dataset", datasetpath
		return ""
	endif
	if (di.ndims < 2)
		print "error: rank of dataset < 2", datasetpath
		return ""
	elseif (di.ndims < 3)
		progress = 0
	endif
	if ((di.datatype_class != H5T_INTEGER) && (di.datatype_class != H5T_FLOAT))
		print "error: unsupported datatype", datasetpath
		return ""
	endif

	variable idx, idy, idz, idt, izt
	variable transpose = WhichListItem(datawavename, kTransposedDatasets) >= 0
	if (transpose)
		idx = 1
		idy = 0
	else
		idx = 0
		idy = 1
	endif
	idz = 2
	idt = 3

	variable nx, ny, nz, nt, nzt
	nx = di.dims[idx]
	ny = di.dims[idy]
	nz = di.dims[idz]
	nt = di.dims[idt]
	make /n=(nx,ny,nz,nt) /o $datawavename
	wave data = $datawavename

	nz = max(nz, 1)
	nt = max(nt, 1)
	nzt = nz * nt
	izt = 0
	if (progress)
		display_progress_panel("HDF5 Import", "Loading " + datasetpath + "...", nzt)
	endif

	// load data image by image
	HDF5MakeHyperslabWave("slab", max(di.ndims, 4))
	wave slab
	slab[][%Start] = 0
	slab[][%Stride] = 1
	slab[][%Count] = 1
	slab[][%Block] = 1
	slab[idx][%Block] = nx
	slab[idy][%Block] = ny

	variable iz, it
	for (iz = 0; iz < nz; iz += 1)
		for (it = 0; it < nt; it += 1)
			slab[idz][%Start] = iz
			slab[idt][%Start] = it
			HDF5LoadData /O /Q /Z /SLAB=slab /N=slabdata file_id, datasetpath
			wave slabdata // 2D, 3D, or 4D with singletons
			if (transpose)
				data[][][iz][it] = slabdata[q][p][0][0]
			else
				data[][][iz][it] = slabdata[p][q][0][0]
			endif

			// progress window
			izt += 1
			if (progress)
				if (update_progress_panel(izt))
					result = -4 // user abort
					break
				endif
			endif
		endfor
		if (result < 0)
			break
		endif
	endfor

	if (progress)
		kill_progress_panel()
	endif

	killwaves /z slab, slabdata
	if (!result)
		ps_set_dimlabels(data)
		return NameOfWave(data) + ";"
	else
		killwaves /z data
		return ""
	endif
end

// ====== data reduction ======

/// load a dataset with reduced dimensionality
///
/// the function loads the dataset image by image using the hyperslab option
/// and applies a custom reduction function like numeric integration, curve fitting, etc. to each image.
/// the results from the reduction function are written to the `ReducedData1`, `ReducedData2`, etc. waves.
/// the raw data are discarded.
///
/// example reduction functions can be found in the @ref PearlScientaPreprocess module.
/// they must implement the @ref adh5_default_reduction() interface.
///
/// by default, the reduction function is called in separate threads to reduce the total loading time.
/// (psh5_load() reports the total run time in the global variable psh5_perf_secs.)
/// the effect varies depending on the balance between file loading (image size)
/// and data processing (complexity of the reduction function).
///
/// the function loads images (as hyperslabs) one by one and passes them to the reduction function.
/// only a limited number of images are held in the queue at a time to limit memory use.
/// for debugging the reduction function, multi-threading can be disabled
/// (also remove threadsafe attributes from reduce_slab_image() and the reduction function!)
///
/// if the reduction function requires the image waves to be scaled properly,
/// the attributes must have been loaded by psh5_load_scan_attrs() before.
/// in this case, the scales of the result waves are also set by the function.
/// otherwise, the results can also be scaled by ps_scale_dataset() later.
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @param scanpath			path to scan group in the HDF5 file.
///
/// @param datasetname		name of the dataset.
///							this must currently be "ScientaImage", other data is not supported.
///							the name of the loaded wave is a cleaned up version of the dataset name.
///							the name can include the region name as a relative path, e.g. "region1/ScientaImage".
///							in this case, the dataset is loaded into a sub-folder named "region1".
///
/// @param reduction_func		custom data reduction function.
/// 							this can be any user-defined function which has the same parameters as @ref adh5_default_reduction.
///							some reduction functions are predefined in the @ref PearlScientaPreprocess module.
///
/// @param reduction_params	parameter string for the reduction function.
///
/// @param	create_folders	if 1 (default), data folders according to the group path are created.
///									if 0, the dataset is loaded into the current folder.
///
/// @param progress			progress window.
///	@arg	1				(default) show progress window
///	@arg	0				do not show progress window
///
/// @param nthreads
///	@arg	-1				(default) use as many threads as there are processor cores (in addition to main thread).
///	@arg	0				use main thread only (for debugging and profiling).
///	@arg	>= 1			use a fixed number of (additional) threads.
///
/// @return					semicolon-separated list of the loaded dataset `ReducedData1`, `ReducedData2`, etc. if successful.
///							auxiliary waves, scan positions, attributes are loaded but not listed in the string.
///							empty string if an error occurred.
///							error messages are printed to the history.
///
function /s psh5_load_dataset_reduced(file_df, datasetpath, reduction_func, reduction_params, [create_folders, progress, nthreads])
	dfref file_df
	string datasetpath
	funcref adh5_default_reduction reduction_func
	string reduction_params
	variable create_folders
	variable progress
	variable nthreads

	if (ParamIsDefault(create_folders))
		create_folders = 1
	endif
	if (ParamIsDefault(progress))
		progress = 1
	endif
	if (ParamIsDefault(nthreads))
		nthreads = -1
	endif

	dfref base_df = GetDataFolderDFR()
	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	variable result = 0
	string datawavename = StringFromList(ItemsInList(datasetpath, "/") - 1, datasetpath, "/")
	string wavenames = ""

	if (create_folders)
		psh5_create_folders(datasetpath)
	endif

	STRUCT HDF5DataInfo di	// Defined in HDF5 Browser.ipf.
	InitHDF5DataInfo(di)
	variable err = HDF5DatasetInfo(file_id, datasetpath, 0, di)
	if (err != 0)
		print "error accessing detector/data"
		result = -1
		return wavenames
	endif
	if (di.ndims < 2)
		print "error: rank of dataset < 2"
		result = -2
		return wavenames
	elseif (di.ndims < 3)
		progress = 0
	endif

	variable idx, idy, idz, idt
	variable transpose = WhichListItem(datawavename, kTransposedDatasets) >= 0
	if (transpose)
		idx = 1
		idy = 0
	else
		idx = 0
		idy = 1
	endif
	idz = 2
	idt = 3

	variable nx, ny, nz, nt, nzt
	nx = di.dims[idx]
	ny = di.dims[idy]
	nz = di.dims[idz]
	nt = di.dims[idt]
	// adjust singleton dimensions
	nz = max(nz, 1)
	nt = max(nt, 1)
	nzt = nz * nt

	// load data image by image
	HDF5MakeHyperslabWave("slab", max(di.ndims, 4))
	wave slab
	slab[][%Start] = 0
	slab[][%Stride] = 1
	slab[][%Count] = 1
	slab[][%Block] = 1
	slab[idx][%Block] = nx
	slab[idy][%Block] = ny

	// set up multi threading
	if (nthreads < 0)
		nthreads = ThreadProcessorCount
	endif
	if (nthreads > 0)
		variable threadGroupID = ThreadGroupCreate(nthreads)
		variable ithread
		for (ithread = 0; ithread < nthreads; ithread += 1)
			ThreadStart threadGroupID, ithread, reduce_slab_worker(reduction_func)
		endfor
	else
		make /n=(nzt) /df /free processing_folders
	endif

	if (progress)
		display_progress_panel("PShell Import", "Reducing " + datasetpath + "...", nzt)
	endif

	// create a template wave with the correct scales and labels
	make /n=(nx,ny) /d /o $datawavename
	wave template = $datawavename
	ps_set_dimlabels2(template, datawavename)
	ps_scale_dataset(template)

	variable iz, it, izt
	variable n_sent = 0
	variable n_recvd = 0
	variable tmo = 0
	string dfname
	dfref dfr
	variable iw, nw
	string sw
	make /n=0 /free /wave result_waves

	iz = 0
	it = 0

	do
		// fill the processing queue up to a maximum number of folders
		if (n_sent < max(1, nthreads) * 10 + n_recvd)
			if (iz < nz)
				if (it < nt)
					// load a slab into a temporary folder
					slab[idz][%Start] = iz
					slab[idt][%Start] = it
					dfname = "processing_" + num2str(n_sent)
					NewDataFolder /s $dfname
					HDF5LoadData /O /Q /Z /SLAB=slab /N=slabdata file_id, datasetpath

					duplicate template, image
					variable /g r_index = iz
					variable /g s_index = it
					string /g func_param = reduction_params

					if (nthreads > 0)
						// send to thread group
						WaveClear image
						ThreadGroupPutDF threadGroupID, :
					else
						// process immediately in single-thread mode
						processing_folders[n_sent] = GetDataFolderDFR()
						make /n=1/d profile1, profile2
						wave slabdata
						wave /wave reduced_waves = reduce_slab_image(slabdata, image, reduction_func, func_param)
						variable /g func_result = numpnts(reduced_waves)
						adh5_get_result_waves(reduced_waves, "redw_", 0)
						WaveClear slabdata, image, reduced_waves
						setdatafolder ::
					endif

					iz += 1
					n_sent += 1
					tmo = 0
				else
					iz += 1
					it = 0
				endif
			endif
		else
			// throttle the loop if processing is slow
			tmo = min(100, tmo + 10)
		endif

		// receive a slab from the processing queue
		if (n_recvd < nzt)
			if (nthreads > 0)
				dfr = ThreadGroupGetDFR(threadGroupID, tmo)
			else
				dfr = processing_folders[n_recvd]
				processing_folders[n_recvd] = $""
			endif

			if (DatafolderRefStatus(dfr) != 0)
				// access results folder
				nvar rr = dfr:r_index
				nvar ss = dfr:s_index
				nvar func_result = dfr:func_result

				if (func_result < 1)
					print "error during data reduction."
					result = -3
					break
				endif

				// initialize result waves just once
				if (numpnts(result_waves) == 0)
					redimension /n=(func_result) result_waves
					for (iw = 0; iw < func_result; iw += 1)
						sw = "redw_" + num2str(iw)
						wave profile = dfr:$sw
						sw = "ReducedData" + num2str(iw+1)
						make /n=(dimsize(profile, 0), nz, nt) /d /o $sw
						wave data = $sw
						setdimlabel 0, -1, $getdimlabel(profile, 0, -1), data
						setdimlabel 1, -1, $kScanDimLabel, data
						note data, note(profile)
						ps_scale_dataset(data)
						setscale /p x dimoffset(profile, 0), dimdelta(profile, 0), waveunits(profile, 0), data
						setscale d 0, 0, waveunits(profile, -1), data
						result_waves[iw] = data
					endfor
				endif

				// copy results
				for (iw = 0; iw < func_result; iw += 1)
					sw = "redw_" + num2str(iw)
					wave profile = dfr:$sw
					wave data = result_waves[iw]
					data[][rr][ss] = profile[p]
				endfor

				n_recvd += 1
				KillDataFolder /Z dfr
			endif
		else
			// processing complete
			break
		endif

		// update progress window
		if (progress)
			if (update_progress_panel(n_recvd))
				print "user abort"
				result = -4
				break
			endif
		endif
	while ((n_recvd < nzt) && (result == 0))

	// clean up
	killwaves /z slab, slabdata, template

	if (nthreads > 0)
		variable tstatus = ThreadGroupRelease(threadGroupID)
		if (tstatus == -2)
			print "error: thread did not terminate properly."
			result = -5
		endif
	endif

	// finalize results
	nw = numpnts(result_waves)
	wavenames = ""
	for (iw = 0; iw < nw; iw += 1)
		wave /z data = result_waves[iw]
		if (WaveExists(data))
			if (nz == 1)
				redimension /n=(-1, 0, 0) data
			elseif (nt == 1)
				redimension /n=(-1, nz, 0) data
			endif
			wavenames += nameofwave(data) + ";"
		endif
	endfor

	if (progress)
		kill_progress_panel()
	endif

	setdatafolder base_df
	return wavenames
end


threadsafe static function reduce_slab_worker(reduction_func)
	funcref adh5_default_reduction reduction_func
	do
		// wait for job from main thread
		do
			dfref dfr = ThreadGroupGetDFR(0, 1000)
			if (DataFolderRefStatus(dfr) == 0)
				if (GetRTError(2))
					return 0 // no more jobs
				endif
			else
				break
			endif
		while (1)

		// get input data
		wave slabdata = dfr:slabdata
		wave image = dfr:image
		svar func_param = dfr:func_param
		nvar rr = dfr:r_index
		nvar ss = dfr:s_index

		// do the work
		newdatafolder /s out_df
		variable /g r_index = rr
		variable /g s_index = ss
		wave /wave reduced_waves = reduce_slab_image(slabdata, image, reduction_func, func_param)
		variable /g func_result = numpnts(reduced_waves)

		// send output to queue and clean up
		adh5_get_result_waves(reduced_waves, "redw_", 0)
		WaveClear slabdata, image, reduced_waves
		ThreadGroupPutDF 0, :
		KillDataFolder dfr
	while (1)

	return 0
end

threadsafe static function /wave reduce_slab_image(slabdata, image, reduction_func, reduction_params)
	wave slabdata
	wave image
	funcref adh5_default_reduction reduction_func
	string reduction_params

	image = slabdata[q][p][0][0]

	return reduction_func(image, reduction_params)
end

// ====== meta and auxiliary data ======

/// load organizational metadata from the general group.
///
/// the general group contains the following datasets:
/// authors, pgroup, proposal, proposer, sample.
///
/// data is loaded into the current data folder.
/// all items are loaded into strings, authors is a comma-separated list.
/// missing items default to empty strings.
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @return					semicolon-separated list of the objects.
///
function /s psh5_load_general_group(file_df)
	dfref file_df

	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	string obj_names = "authors;pgroup;proposal;proposer;sample;"
	variable nn = ItemsInList(obj_names, ";")
	variable ii
	string name

	for (ii = 0; ii < nn; ii += 1)
		name = StringFromList(ii, obj_names, ";")
		psh_load_general_string(file_df, name)
	endfor

	return obj_names
end

/// load a string from the general group.
///
/// the general group contains the following datasets:
/// authors, pgroup, proposal, proposer, sample.
///
/// data is loaded into a global string in the current data folder.
/// arrays with multiple items are loaded into a comma-separated list.
/// a missing item defaults to the empty string.
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @return					comma-separated list of values.
///
function /s psh_load_general_string(file_df, name)
	dfref file_df
	string name

	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	string path = "/general/" + name
	HDF5LoadData /O /Q /Z /N=wt_load_general /TYPE=1 file_id, path
	string values = ""
	if (!v_flag)
		wave /t wt_load_general
		variable nn = numpnts(wt_load_general)
		variable ii
		for (ii = 0; ii < nn; ii += 1)
			values = AddListItem(wt_load_general[ii], values, ",", inf)
		endfor
		killwaves /z wt_load_general
		if (strlen(values) >= 1)
			values = values[0,strlen(values)-2]
		endif
	endif
	string /g $name = values
	return values
end

/// load metadata of a PShell dataset.
///
/// _metadata_ are the HDF5 attributes attached to a dataset.
/// they are mapped to "key=value" pairs and added to the wave note in separate lines.
/// the following attributes are loaded.
/// names and mappings are hard-coded.
///
/// - "Writable Dimension" -> "ScanDimension"
/// - "Writable Index" -> "WriteableIndex"
/// - "Readable Index" -> "ReadableIndex"
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @param	datasetpath	group path and name of the dataset.
///								path separator is the slash "/".
///
/// @param	datawave		metadata is added to the wave note of this wave.
///
/// @return	0 if successful, non-zero if an error occurred.
///
function psh5_load_dataset_meta(file_df, datasetpath, datawave)
	dfref file_df
	string datasetpath
	wave datawave

	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	dfref save_df = GetDataFolderDFR()
	SetDataFolder NewFreeDataFolder()

	string wnote

	HDF5LoadData /O /Q /Z /A="Writable Dimension" /N=WriteDim file_id, datasetpath
	if (!v_flag)
		wave WriteDim
		// scan dimension starts at 1
		sprintf wnote, "ScanDimension=%u", WriteDim[0]
		Note datawave, wnote
	endif

	HDF5LoadData /O /Q /Z /A="Writable Index" /N=WriteIndex file_id, datasetpath
	if (!v_flag)
		wave WriteIndex
		sprintf wnote, "WriteableIndex=%u", WriteIndex[0]
		Note datawave, wnote
	endif

	HDF5LoadData /O /Q /Z /A="Readable Index" /N=ReadIndex file_id, datasetpath
	if (!v_flag)
		wave ReadIndex
		sprintf wnote, "ReadableIndex=%u", ReadIndex[0]
		Note datawave, wnote
	endif

	setdatafolder save_df
	return 0
end

/// load metadata of a PShell scan group.
///
/// _metadata_ are the HDF5 attributes attached to the scan group.
/// the following attributes are loaded.
/// the respective wave names under Igor are given in parentheses.
///
/// - Dimensions (ScanDimensions)
/// - Writables (ScanWritables)
/// - Readables (ScanReadables)
/// - Steps (ScanSteps)
/// - Iterations (ScanIterations) - if present (XPSSpectrum script)
/// - Step Size (ScanStepSize) - if present (XPSSpectrum script)
/// - Step Time (ScanStepTime) - if present (XPSSpectrum script)
///
/// if they are missing in the file, `ScanDimensions` and `ScanReadables` are set to default values
/// assuming the file contains a single spectrum.
///
/// data is loaded into the current data folder.
///
/// @param	file_df			data folder reference of open HDF5 file from psh5_open_file().
///								if undefined, the current datafolder is assumed.
///
/// @param	scanpath		path to the scan group in the HDF5 file, e.g. "/scan 1".
///
/// @return					semicolon-separated list of the loaded waves.
///
function /s psh5_load_scan_meta(file_df, scanpath)
	// todo: convert to variables/strings
	dfref file_df
	string scanpath
	string wavenames = ""

	if (!DataFolderRefStatus(file_df))
		dfref file_df = GetDataFolderDFR()
	endif
	nvar /sdfr=file_df file_id

	HDF5LoadData /O /Q /Z /A="Dimensions" /N=ScanDimensions /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	else
		make /n=1 /o ScanDimensions
		ScanDimensions = 0
		wavenames = AddListItem("ScanDimensions", wavenames, ";", inf)
	endif
	HDF5LoadData /O /Q /Z /A="Readables" /N=ScanReadables /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	else
		make /n=1 /o /t ScanReadables
		ScanReadables[0] = "ScientaSpectrum"
		wavenames = AddListItem("ScanReadables", wavenames, ";", inf)
	endif
	HDF5LoadData /O /Q /Z /A="Writables" /N=ScanWritables /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	else
		// OTF script
		HDF5LoadData /O /Q /Z /A="PlotDomain" /N=ScanWritables /TYPE=1 file_id, scanpath
		if (!v_flag)
			wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
		endif
	endif
	HDF5LoadData /O /Q /Z /A="Steps" /N=ScanSteps /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	endif
	wavenames = ReplaceString(";;", wavenames, ";")

	// additional attributes from XPSSpectrum.py
	HDF5LoadData /O /Q /Z /A="Iterations" /N=ScanIterations /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	endif
	HDF5LoadData /O /Q /Z /A="Step Size" /N=ScanStepSize /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	endif
	HDF5LoadData /O /Q /Z /A="Step Time" /N=ScanStepTime /TYPE=1 file_id, scanpath
	if (!v_flag)
		wavenames = AddListItem(s_wavenames, wavenames, ";", inf)
	endif

	return wavenames
end

// ====== dimension scaling ======

/// set dimension labels according to the axis type
///
/// this function asserts a particular ordering of dimensions types
/// based on the name of the wave for
/// ScientaImage, ScientaSpectrum, ImageAngleDistribution, ImageEnergyDistribution.
/// all other waves must be one-dimensional, and the dimension must be the scan dimension.
///
/// dimension labels are required by scaling functions.
///
/// @param	data				data wave as loaded from PShell file
///
/// @return	@arg 0			all labels set successfully.
///			@arg 1			unidentified data source.
///			@arg 2			wave does not contain data.
///
function ps_set_dimlabels(data)
	wave data

	ps_set_dimlabels2(data, NameOfWave(data))
end

/// set dimension labels according to the axis type
///
/// same as ps_set_dimlabels() except that the dimension labels are set
/// according to a separate name argument instead of the wave name.
///
/// @param	data				data wave as loaded from PShell file.
///
/// @param	name			original name of the dataset in the PShell file.
///
/// @return	@arg 0			all labels set successfully.
///			@arg 1			unidentified data source.
///			@arg 2			wave does not contain data.
///
function ps_set_dimlabels2(data, name)
	wave data
	string name

	variable dummy
	try
		// intrinsic dimensions
		strswitch(name)
			case "ScientaImage":
				setdimlabel 0, -1, $kEnergyDimLabel, data
				setdimlabel 1, -1, $kAngleDimLabel, data
				if (WaveDims(data) >= 3)
					setdimlabel 2, -1, $kScanDimLabel, data
				endif
				AbortOnRTE
				break
			case "ImageAngleDistribution":
			case "ScientaAngleDistribution":
				if (WaveDims(data) >= 2)
					setdimlabel 0, -1, $kScanDimLabel, data
					setdimlabel 1, -1, $kAngleDimLabel, data
				else
					setdimlabel 0, -1, $kAngleDimLabel, data
				endif
				AbortOnRTE
				break
			case "ScientaSpectrum":
			case "ImageEnergyDistribution":
			case "ScientaEnergyDistribution":
				if (WaveDims(data) >= 2)
					setdimlabel 0, -1, $kScanDimLabel, data
					setdimlabel 1, -1, $kEnergyDimLabel, data
				else
					setdimlabel 0, -1, $kEnergyDimLabel, data
				endif
				AbortOnRTE
				break
			default:
				if (WaveDims(data) == 1)
					setdimlabel 0, -1, $kScanDimLabel, data
					AbortOnRTE
				else
					return 1
				endif
		endswitch
	catch
		dummy = GetRTError(1)
		return 2
	endtry
	return 0
end

/// find the scan folder of current data
///
/// assuming we are in the data folder (where the scan results, ScientaSpectrum, etc.) are,
/// find the associated scan folder.
/// this can either be the same (usually) or the parent folder (multi-region scans).
///
/// the scan folder is the one that contains the ScanWritables wave.
/// the data and scan folders may refer to the same folder.
///
function /df ps_find_scan_folder(data_df)
	dfref data_df

	wave /z /t /sdfr=data_df ScanWritables=::ScanWritables
	if (WaveExists(ScanWritables))
		string sdf = GetDataFolder(1, data_df)
		dfref parent_df = $(sdf + ":")
		return parent_df
	else
		return data_df
	endif
end

/// find the attributes data folder
///
/// the attributes folder contains diagnostic beamline data at each scan point.
/// the folder can have one of several names due to different pshell versions:
/// "attr", "attrs", or "diags" (from 2022 on).
/// historically, the folder was named "attr" due to the area detector software.
///
/// assuming we are in the scan folder (where the ScanWritables, etc.) are,
/// find the associated attributes folder.
///
function /df ps_find_attr_folder(scan_df)
	dfref scan_df

	dfref diags_df = data_df:diags
	dfref attrs_df = scan_df:attrs
	dfref attr_df = scan_df:attr
	if (DataFolderRefStatus(diags_df))
		return diags_df
	elseif (DataFolderRefStatus(attrs_df))
		return attrs_df
	elseif (DataFolderRefStatus(attr_df))
		return attr_df
	else
		return $""
	endif
end

/// find a wave in scan and attr data folders
///
/// look up a wave by name in the given three data folders.
/// return the first one found.
///
/// @param	df1		first data folder to check
/// @param	df2		second data folder to check
/// @param	df3		third data folder to check
/// @return	wave reference, empty reference if not found
///
function /wave ps_find_scale_wave(name, df1, df2, df3)
	string name
	dfref df1
	dfref df2
	dfref df3

	variable idf
	variable ndf=3
	make /n=(ndf) /df /free dfs
	dfs[0] = {df1, df2, df3}
	for (idf = 0; idf < ndf; idf += 1)
		if (DataFolderRefStatus(dfs[idf]))
			wave /SDFR=dfs[idf] /Z w = $name
			if (WaveExists(w))
				return w
			endif
		endif
	endfor

	return $""
end

/// detect the dimension scales from attributes.
///
/// the function checks the data , scan and attributes folders for scan parameters.
/// the results are written to the provided waves.
/// the function is normally called by ps_scale_datasets() but can also be used independently.
///
/// the data folder contains the waves that are to be scaled.
/// the function looks for the scan positions and diagnostics as necessary.
/// if the scaling data is not found, the scales are not changed.
/// the kEssentialDiags flag can be used with psh5_load() to select the necessary datasets.
///
/// the provided waves are redimensioned by the function, and dimension labels are set.
/// the scale parameters can then be extracted by keyword, e.g.,
/// @arg		`lo[%%energy]`	analyser energy dimension.
/// @arg		`lo[%%angle]`	analyser angle dimension.
/// @arg		`lo[%%scan]`	scan dimension.
/// @arg		`lo[%%data]`		data dimension.
///
/// the function tries to read the following waves, in the data, scan, and attributes/diagnostics folders,
/// where the first folder in the list takes precedence.
/// it may fall back to more or less reasonable default values if no data is not found.
/// @arg		`LensMode`
/// @arg		`ScientaChannelBegin`
/// @arg		`ScientaChannelEnd`
/// @arg		`ScientaSliceBegin`
/// @arg		`ScientaSliceEnd`
/// @arg		`ScanWritables`
/// @arg		wave referenced by `ScanWritables[0]`
///
/// @param	data_df			data folder which contains the waves to be scaled.
///								this is usually the "scan" or "region" folder.
///
/// @param	ax				text wave to receive the axis labels.
///
/// @param	lo				wave to receive the lower limits.
///
/// @param	hi				wave to receive the upper limits.
///
/// @param	un				text wave to receive the unit labels.
///
/// @return					the function results are written to the lo, hi, un, and ax waves.
///
function ps_detect_scale(data_df, ax, lo, hi, un)
	dfref data_df
	wave /t ax
	wave lo
	wave hi
	wave /t un

	dfref scan_df = ps_find_scan_folder(data_df)
	dfref attr_df = ps_find_attr_folder(scan_df)

	redimension /n=4 lo, hi, un, ax
	setdimlabel 0, 0, $kEnergyDimLabel, lo, hi, un, ax
	setdimlabel 0, 1, $kAngleDimLabel, lo, hi, un, ax
	setdimlabel 0, 2, $kScanDimLabel, lo, hi, un, ax
	setdimlabel 0, 3, $kDataDimLabel, lo, hi, un, ax

	// default values
	lo[%$kEnergyDimLabel] = 0
	hi[%$kEnergyDimLabel] = 1
	un[%$kEnergyDimLabel] = "eV"
	ax[%$kEnergyDimLabel] = "Ekin"

	lo[%$kAngleDimLabel] = -1
	hi[%$kAngleDimLabel] = 1
	un[%$kAngleDimLabel] = "arb."
	un[%$kAngleDimLabel] = "slice"

	lo[%$kScanDimLabel] = 0
	hi[%$kScanDimLabel] = 1
	un[%$kScanDimLabel] = "arb."
	ax[%$kScanDimLabel] = "scan"

	lo[%$kDataDimLabel] = 0
	hi[%$kDataDimLabel] = 0
	un[%$kDataDimLabel] = "arb."
	ax[%$kDataDimLabel] = "value"

	wave /T /Z LensMode = ps_find_scale_wave("LensMode", data_df, scan_df, attr_df)
	wave /Z ChannelBegin = ps_find_scale_wave("ScientaChannelBegin", data_df, scan_df, attr_df)
	wave /Z ChannelEnd = ps_find_scale_wave("ScientaChannelEnd", data_df, scan_df, attr_df)
	wave /Z SliceBegin = ps_find_scale_wave("ScientaSliceBegin", data_df, scan_df, attr_df)
	wave /Z SliceEnd = ps_find_scale_wave("ScientaSliceEnd", data_df, scan_df, attr_df)
	wave /Z ScientaChannels = ps_find_scale_wave("ScientaChannels", data_df, scan_df, attr_df)

	// lens mode can give more detail
	if (waveexists(LensMode) && (numpnts(LensMode) >= 1))
		strswitch(LensMode[0])
			case "Angular45":
				lo[%$kAngleDimLabel] = -45/2
				hi[%$kAngleDimLabel] = +45/2
				un[%$kAngleDimLabel] = "°"
				ax[%$kAngleDimLabel] = "angle"
				break
			case "Angular60":
				lo[%$kAngleDimLabel] = -60/2
				hi[%$kAngleDimLabel] = +60/2
				un[%$kAngleDimLabel] = "°"
				ax[%$kAngleDimLabel] = "angle"
				break
			case "Transmission":
				un[%$kAngleDimLabel] = "arb."
				ax[%$kAngleDimLabel] = "offset"
				break
		endswitch
	endif

	// best option if scales are explicit in separate waves
	if (waveexists(ChannelBegin) && waveexists(ChannelEnd) && (numpnts(ChannelBegin) >= 1) && (numpnts(ChannelEnd) >= 1))
		lo[%$kEnergyDimLabel] = ChannelBegin[0]
		hi[%$kEnergyDimLabel] = ChannelEnd[0]
	elseif (waveexists(ScientaChannels) && (numpnts(ScientaChannels) >= 1))
		lo[%$kEnergyDimLabel] = ScientaChannels[0]
		hi[%$kEnergyDimLabel] = ScientaChannels[numpnts(ScientaChannels)-1]
	endif
	if (waveexists(SliceBegin) && waveexists(SliceEnd) && (numpnts(SliceBegin) >= 1) && (numpnts(SliceEnd) >= 1))
		lo[%$kAngleDimLabel] = SliceBegin[0]
		hi[%$kAngleDimLabel] = SliceEnd[0]
	endif

	wave /z /t /SDFR=scan_df ScanWritables
	if (WaveExists(ScanWritables))
		wave /z /SDFR=scan_df scanner = $ScanWritables[0]
		if (!WaveExists(scanner))
			wave /z /SDFR=attr_df scanner = $ScanWritables[0]
		endif
		if (WaveExists(scanner) && (numpnts(scanner) >= 1))
			lo[%$kScanDimLabel] = scanner[0]
			hi[%$kScanDimLabel] = scanner[numpnts(scanner)-1]
			ax[%$kScanDimLabel] = NameOfWave(scanner)
			strswitch(NameOfWave(scanner))
				case "Eph":
					ax[%$kScanDimLabel] = "photon energy"
					un[%$kScanDimLabel] = "eV"
					break
				case "ManipulatorX":
				case "ManipulatorY":
				case "ManipulatorZ":
				case "FocusYTrans":
				case "FocusZTrans":
				case "RefocusYTrans":
				case "RefocusZTrans":
				case "ExitSlitY":
					un[%$kScanDimLabel] = "mm"
					break
				case "ExitSlit":
					un[%$kScanDimLabel] = "µm"
					break
				case "ManipulatorTheta":
				case "ManipulatorTilt":
				case "ManipulatorPhi":
					un[%$kScanDimLabel] = "°"
					break
				case "FocusXRot":
				case "FocusYRot":
				case "FocusZRot":
				case "RefocusXRot":
				case "RefocusYRot":
				case "RefocusZRot":
					un[%$kScanDimLabel] = "mrad"
					break
			endswitch
		endif
	endif
end

/// set the dimension scales of a dataset.
///
/// the function is normally called by ps_scale_datasets() but can also be used independently.
/// the limits and units must be given as function arguments with proper dimension labels.
///
/// the provided limit and unit waves must have dimension labels
/// matching the -1 index dimension labels of the data wave,
/// such as set by the ps_detect_scale() function.
/// the scale parameters are extracted by keyword, e.g.,
/// @arg		`lo[%%energy]`	analyser energy dimension.
/// @arg		`lo[%%angle]`	analyser angle dimension.
/// @arg		`lo[%%scan]`	scan dimension.
/// @arg		`lo[%%data]`		data dimension.
///
/// if the data dimension labels and units are at their defaults ("value" and "arb.", respectively),
/// the function tries to read them from the existing wave note ("AxisLabelD" and "AxisUnitD"),
/// or based on the wave name if the name is one of the known measurement variables:
/// "ScientaImage", "ImageAngleDistribution", "ScientaAngleDistribution",  "ScientaSpectrum", "ImageEnergyDistribution", "ScientaEnergyDistribution",
/// "SampleCurrent", "RefCurrent", "AuxCurrent", "MachineCurrent".
///
/// @param	data				data wave to be scaled.
///							dimension labels (index -1) must be set to match the limit waves.
///
/// @param	ax				axis labels.
///							the axis labels are written to the wave note in the format `AxisLabel%%s=%%s`
///							where `X`, `Y`, `Z`, `D` is substituted for the first place holder
///							and the label for the second one.
///
/// @param	lo				lower limits.
///							the lower limits are applied using the SetScale operation.
///
/// @param	hi				upper limits.
///							the upper limits are applied using the SetScale operation.
///
/// @param	un				unit labels.
///							the unit labels are applied using the SetScale operation.
///
/// @version	this function supports regions from version 1.03.
///
function ps_scale_dataset_2(data, ax, lo, hi, un)
	wave data
	wave /t ax
	wave lo
	wave hi
	wave /t un

	string snote = note(data)
	string sdim
	sdim = GetDimLabel(data, 0, -1)
	if (strlen(sdim))
		setscale /i x lo[%$sdim], hi[%$sdim], un[%$sdim], data
		snote = ReplaceStringByKey("AxisLabelX", snote, ax[%$sdim], "=", "\r")
	endif

	sdim = GetDimLabel(data, 1, -1)
	if (strlen(sdim))
		setscale /i y lo[%$sdim], hi[%$sdim], un[%$sdim], data
		snote = ReplaceStringByKey("AxisLabelY", snote, ax[%$sdim], "=", "\r")
	endif

	sdim = GetDimLabel(data, 2, -1)
	if (strlen(sdim))
		setscale /i z lo[%$sdim], hi[%$sdim], un[%$sdim], data
		snote = ReplaceStringByKey("AxisLabelZ", snote, ax[%$sdim], "=", "\r")
	endif

	string data_unit = un[%$kDataDimLabel]
	string data_label = ax[%$kDataDimLabel]
	string s
	variable def = (cmpstr(data_unit, "arb.") == 0) && (cmpstr(data_label, "value") == 0)

	if (def)
		s = StringByKey("AxisLabelD", snote, "=", "\r")
		if (strlen(s) > 0)
			data_label = s
			def = 0
		endif
		s = StringByKey("AxisUnitD", snote, "=", "\r")
		if (strlen(s) > 0)
			data_unit = s
			def = 0
		endif
	endif

	if (def)
		strswitch(NameOfWave(data))
			case "ScientaImage":
			case "ImageAngleDistribution":
			case "ScientaAngleDistribution":
			case "ScientaSpectrum":
			case "ImageEnergyDistribution":
			case "ScientaEnergyDistribution":
				data_unit = "arb."
				data_label = "intensity"
				def = 0
				break
			case "SampleCurrent":
			case "RefCurrent":
			case "AuxCurrent":
				data_unit = "A"
				data_label = "current"
				def = 0
				break
			case "MachineCurrent":
				data_unit = "mA"
				data_label = "current"
				def = 0
				break
		endswitch
	endif

	setscale d 0, 0, data_unit, data
	snote = ReplaceStringByKey("AxisLabelD", snote, data_label, "=", "\r")
	snote = ReplaceStringByKey("AxisUnitD", snote, data_unit, "=", "\r")
	snote = ReplaceStringByKey("Dataset", snote, NameOfWave(data), "=", "\r")
	note /k data, snote
end

/// set the dimension scales of loaded PShell Scienta datasets according to attributes.
///
/// datasets listed in the ScanReadables waves are scaled
/// according to the attribute waves in the data, scan, and attributes folders,
/// whichever is found first.
///
/// the specified datafolder must contain the ScanReadables wave and the :attr folder.
/// the ScanReadables text wave contains names of the waves to scale.
/// wave names can include a relative path to a sub-folder. the path separator is "/".
///
/// the dimension labels of the dataset waves must have been set correctly, e.g. by ps_set_dimlabels().
/// this is implicitly done by the high-level load functions.
///
/// @param scan_df			scan data folder. must contain the ScanReadables wave.
///
function ps_scale_datasets(scan_df)
	dfref scan_df

	make /n=3 /free lo, hi
	make /n=3 /t /free ax, un
	wave /t /z /SDFR=scan_df ScanReadables
	if (WaveExists(ScanReadables))
		variable isr
		variable nsr = numpnts(ScanReadables)
		variable nel
		string ssr
		string sds
		for (isr = 0; isr < nsr; isr += 1)
			ssr = ScanReadables[isr]
			dfref data_df = psh5_dataset_to_folder(scan_df, ssr)
			if (!DataFolderRefStatus(data_df))
				dfref data_df = scan_df
			endif
			nel = ItemsInList(ssr, "/")
			sds = StringFromList(nel - 1, ssr, "/")
			wave /z /sdfr=data_df wsr=$sds
			if (WaveExists(wsr))
				ps_detect_scale(data_df, ax, lo, hi, un)
				ps_scale_dataset_2(wsr, ax, lo, hi, un)
			endif
		endfor
	endif
end

/// set the dimension scales of a loaded PShell Scienta dataset according to attributes.
///
/// the current datafolder must contain the :attr folder.
/// the data wave can be in the current folder or a sub-folder.
///
/// the dimension labels of the dataset waves must have been set correctly, e.g. by ps_set_dimlabels().
/// this is implicitly done by the high-level load functions.
///
/// the function is useful if a single dataset is loaded and scaled.
/// if multiple datasets are loaded, ps_scale_datasets() is slightly more efficient.
///
/// @param	data				data wave to be scaled.
///							dimension labels (index -1) must be set correctly, cf. ps_set_dimlabels().
///
/// @version	this function supports regions from version 1.03.
///
function ps_scale_dataset(data)
	wave data

	dfref save_df = GetDataFolderDFR()
	dfref data_df = GetWavesDataFolderDFR(data)

	setdatafolder data_df
	make /n=3 /free lo, hi
	make /n=3 /t /free ax, un
	ps_detect_scale(data_df, ax, lo, hi, un)
	ps_scale_dataset_2(data, ax, lo, hi, un)
	setdatafolder save_df
end


// ====== miscellaneous functions ======

/// kill any waves matching a pattern in the experiment
///
/// this may be used to kill big waves of original data before saving.
///
/// example: to kill all ScientaImage waves:
/// ~~~~~~
/// kill_matching_waves($"root:", "ScientaImage", 1)
/// ~~~~~~
///
function /s kill_matching_waves(dfr, pattern, recurse, [killed])
	DFREF dfr
	string pattern
	variable recurse
	string killed

	if (ParamIsDefault(killed))
		killed = ""
	endif

	string s
	string r
	variable index = 0
	do
		Wave/Z w = WaveRefIndexedDFR(dfr, index)
		if (!WaveExists(w))
			break
		endif

		s = NameOfWave(w)
		if (stringmatch(s, pattern))
			killwaves /z w
			killed = AddListItem(s, killed, ";", Inf)
		endif

		index += 1
	while(1)

	if (recurse)
		Variable numChildDataFolders = CountObjectsDFR(dfr, 4)
		Variable i
		for(i=0; i<numChildDataFolders; i+=1)
			String childDFName = GetIndexedObjNameDFR(dfr, 4, i)
			DFREF childDFR = dfr:$childDFName
			killed = kill_matching_waves(childDFR, pattern, 1, killed=killed)
		endfor
	endif

	return killed
End