bdbase/h5_storage.py

import getpass
import time
import re
from functools import lru_cache
import h5py
import numpy as np

dt = h5py.special_dtype(vlen=bytes)
numerical_types = (np.dtype('float64'), np.dtype('float32'), np.dtype('uint16'), np.dtype('uint64'), np.dtype('uint32'))

def stringDataset(group, name, data, system=None):
    dset = group.create_dataset(name, (1,), dtype=dt, data=data)
    if system:
        addSystemAttribute(dset, system)

def addStringAttribute(dset_or_group, name, data):
    #return dset_or_group.attrs.create(name, np.string_(data)) # , (1,), dtype=dt)
    dset_or_group.attrs[name] = bytes(data, 'utf-8')

def addSystemAttribute(dset_or_group, data):
    addStringAttribute(dset_or_group, 'system', data)

def add_dataset(group, name, data, system=None, dtype=None):
    if type(data) is str:
        stringDataset(group, name, data, system)
    else:
        if dtype:
            dset = group.create_dataset(name, data=data, dtype=dtype)
        else:
            try:
                dset = group.create_dataset(name, data=data)
            except Exception as e:
                dset = None
                print('Error for dataset %s' % name)
                print('Continuing')
                print(e)

        if dset is not None and system:
            addSystemAttribute(dset, system)

def saveH5Recursive(h5_filename, data_dict, dataH5=None):

    def recurse_save(group, dict_or_data, dict_or_data_name, new_group=None):

        if dict_or_data is None:
            dict_or_data = 'None'
        if group is None:
            print("'recurse_save' has been called with None")
            raise ValueError


        if type(dict_or_data) is dict:

            try:
                new_group = group.create_group(dict_or_data_name)
            except Exception as e:
                print("Error in group.create_group", str(e))
                return

            if new_group is None:
                raise ValueError
            for key, val in dict_or_data.items():
                try:
                    recurse_save(new_group, val, key)
                except ValueError:
                    print('I called recurse_save with None')
                    #import pdb; pdb.set_trace()

        else:
            mydata = dict_or_data
            inner_key = dict_or_data_name


            if type(mydata) is str:
                add_dataset(group, inner_key, mydata.encode('utf-8'), 'unknown')
            elif (type(mydata) is list and type(mydata[0]) is str) or (hasattr(mydata, 'dtype') and mydata.dtype.type is np.str_):
                # For list of strings, we need this procedure
                if type(mydata[0]) is str:
                    mydata = np.array(mydata)
                    print("string to np.str", mydata)
                elif type(mydata[0]) is str:
                    print("np.str")
                try:
                    if hasattr(mydata, 'dtype') and \
                       (mydata.dtype.type is np.str or \
                        mydata.dtype.type is str) and len(mydata.shape) == 2:
                        mydata = mydata.flatten()
                    if len(mydata.shape) == 2:
                        new_list = [[n.encode('ascii') for n in arr] for arr in mydata]
                        max_str_size = max(max(len(n) for n in arr) for arr in mydata)
                    elif len(mydata.shape) == 1:
                        new_list = [n.encode('ascii') for n in mydata]
                        max_str_size = max(len(n) for n in mydata)
                    elif len(mydata.shape) == 0:
                        new_list = [mydata.encode('ascii')]
                        max_str_size = len(new_list[0])
                    #print('Max len %i' % max_str_size)
                    dset = group.create_dataset(inner_key, mydata.shape, 'S%i' % max_str_size, new_list)
                    #print(np.array(dset))
                    dset.attrs.create('system', 'unknown', (1,), dtype=dt)

                except Exception as e:
                    print('Exception:', e )
                    print('Error', inner_key)
                    print(type(mydata))
                    if type(mydata) is list:
                        print('type(mydata[0])')
                        print(type(mydata[0]))
                        print('len mydata shape=', len(mydata.shape))
                    print('mydata')
                    print(mydata)

            elif hasattr(mydata, 'dtype') and mydata.dtype == np.dtype('O'):

                if mydata.shape == ():
                    add_dataset(group, inner_key, mydata, 'unknown')
                elif len(mydata.shape) == 1:
                    add_dataset(group, inner_key, mydata, 'unknown')
                else:
                    for i in range(mydata.shape[0]):
                        for j in range(mydata.shape[1]):
                            try:
                                add_dataset(group, inner_key+'_%i_%i' % (i,j), mydata[i,j], 'unknown')
                            except:
                                print('Error')
                                print(group, inner_key, i, j)
            else:


                try:
                    add_dataset(group, inner_key, mydata, 'unknown')
                except Exception as e:
                    print('Error', e)
                    print(inner_key, type(mydata))


    if dataH5 is None:
        with h5py.File(h5_filename, 'w') as dataH5:
             for main_key, subdict in data_dict.items():
                 recurse_save(dataH5, subdict, main_key, None)
        print("h5_storage.py SAVED TO FILE", h5_filename, flush=True)
    else:
        print("data_dict keys", data_dict.keys())
        for main_key, subdict in data_dict.items():
            recurse_save(dataH5, subdict, main_key, None)
        print("h5_storage.py SAVED TO dataH5", flush=True)
    #recurse_save(dataH5, data_dict, 'none', new_group=dataH5)


def loadH5Recursive(h5_file):
    def recurse_load(group_or_val, key, saved_dict_curr):
        type_ = type(group_or_val)
        if type_ is h5py._hl.files.File:
            for new_key, new_group_or_val in group_or_val.items():
                recurse_load(new_group_or_val, new_key, saved_dict_curr)
        elif type_ is h5py._hl.group.Group:
            saved_dict_curr[key] = new_dict = {}
            for new_key, new_group_or_val in group_or_val.items():
                recurse_load(new_group_or_val, new_key, new_dict)
        elif type_ == np.dtype('O') and type(group_or_val[()]) is bytes:
            saved_dict_curr[key] = group_or_val[()].decode()
        elif type_ == h5py._hl.dataset.Dataset:
            dtype = group_or_val.dtype
            #if not hasattr(group_or_val, 'value'):
            #    print('Could not store key %s with type %s in dict' % (key, dtype))
            #    return
            if dtype in (np.dtype('int64'), np.dtype('int32'), np.dtype('int16'), np.dtype('int8')):
                saved_dict_curr[key] = np.array(group_or_val[()], int).squeeze()
            elif dtype == np.dtype('bool'):
                try:
                    saved_dict_curr[key] = bool(group_or_val[()])
                except:
                    print('Could not store key %s with type %s in dict (1)' % (key, dtype))
            elif dtype in numerical_types:
                saved_dict_curr[key] = np.array(group_or_val[()]).squeeze()
            elif dtype.str.startswith('|S'):
                if group_or_val[()].shape == (1,1):
                    saved_dict_curr[key] = group_or_val[()][0,0].decode()
                elif group_or_val[()].shape == (1,):
                    saved_dict_curr[key] = group_or_val[()][0].decode()

                elif group_or_val[()].shape == ():
                    saved_dict_curr[key] = group_or_val[()].decode()
                else:
                    saved_dict_curr[key] = [x.decode() for x in group_or_val[()].squeeze()]
            elif dtype.str == '|O':
                saved_dict_curr[key] = group_or_val[()]
            elif type(group_or_val[()]) is str:
                saved_dict_curr[key] = group_or_val[()]
            else:
                print('Could not store key %s with type %s in dict (2)' % (key, dtype))
        else:
            print('Could not store key %s with type %s in dict (3)' % (key, type_))

    saved_dict = {}
    with h5py.File(h5_file, 'r') as f:
        if 'none' in f:
            recurse_load(f['none'], 'key', saved_dict)
            saved_dict = saved_dict['key']
        else:
            recurse_load(f, 'key', saved_dict)
    return saved_dict

def save_h5_new(saved_dict, h5_file):

    def recurse_save(dict_, group, system):
        print('recurse', dict_.keys())
        for key, subdict_or_data in dict_.items():
            type_ = type(subdict_or_data)
            print(key, type_)
            if type_ is dict:
                new_group = group.create_group(key)
                recurse_save(subdict_or_data, new_group, system)
            elif type_ is np.ndarray:
                add_dataset(group, key, subdict_or_data, system)
            elif type_ is str:
                add_dataset(group, key, subdict_or_data, system, dtype=dt)
            else:
                raise ValueError(key, type_)

    @lru_cache()
    def re_axis(x):
        return re.compile(r'gr_%s_axis_(\d+)_(\d+)' % x)

    @lru_cache()
    def re_gauss_function(x):
        return re.compile(r'gr_%s_fit_gauss_function_(\d+)_(\d+)' % x)

    n_measurements, n_images = saved_dict['Raw_data']['image'].shape[:2]

    # Create arrays for gr / slice values, that differ in size for different n_measurements, n_images
    gr_x_shape_max = -1
    gr_y_shape_max = -1
    for key, data in sorted(saved_dict['Raw_data'].items()):
        if key.startswith('gr_x_axis'):
            gr_x_shape_max = max(gr_x_shape_max, data.shape[0])
        elif key.startswith('gr_y_axis'):
            gr_y_shape_max = max(gr_y_shape_max, data.shape[0])

    gr_x_axis = np.zeros([n_measurements, n_images, gr_x_shape_max])*np.nan
    gr_y_axis = np.zeros([n_measurements, n_images, gr_y_shape_max])*np.nan
    gr_x_fit_gauss_function = gr_x_axis.copy()
    gr_y_fit_gauss_function = gr_y_axis.copy()

    for key, data in sorted(saved_dict['Raw_data'].items()):
        for arr, regex in [
                (gr_x_axis, re_axis('x')),
                (gr_y_axis, re_axis('y')),
                (gr_x_fit_gauss_function, re_gauss_function('x')),
                (gr_y_fit_gauss_function, re_gauss_function('y')),
                ]:
            match = regex.match(key)
            if match is not None:
                #print(key, 'matches', regex)
                n_measurement, n_image = map(int, match.groups())
                arr[n_measurement, n_image,:len(data)] = data
                continue

    with h5py.File(h5_file, 'w') as f:
        general = f.create_group('general')
        stringDataset(general, 'user', getpass.getuser())
        stringDataset(general, 'application', 'EmittanceTool')
        stringDataset(general, 'author', 'Philipp Dijkstal and Eduard Prat')
        stringDataset(general, 'created', time.ctime())

        experiment = f.create_group('experiment')
        try:
            from epics import caget
            lrr = float(caget('SIN-TIMAST-TMA:Beam-Exp-Freq-RB'))
        except Exception as e:
            print('Could not obtain Laser rep rate!')
            print(e)
            lrr = np.nan
        add_dataset(experiment, 'Laser rep rate', lrr, 'unknown')
        # TBD: save snapshot here

        scan1 = f.create_group('scan 1')

        method = scan1.create_group('method')
        method.create_dataset('records', data=[float(n_measurements)])
        method.create_dataset('samples', data=[float(n_images)])
        method.create_dataset('dimension', data=[1])
        stringDataset(method, 'type', 'Line scan')
        recurse_save(saved_dict['Input'], method, 'Application Input')


        data = scan1.create_group('data')

        screen = data.create_group(saved_dict['Input']['Profile monitor'])
        recurse_save(saved_dict['Meta_data'], screen, 'Emittance data')


        for key, data_ in sorted(saved_dict['Raw_data'].items()):
            if not any([x.match(key) for x in [re_axis('x'), re_axis('y'), re_gauss_function('x'), re_gauss_function('y')]]):
                add_dataset(screen, key, data_, 'Camera')
                #print('Created %s' % key)

        if not np.all(np.isnan(gr_x_axis)):
            add_dataset(screen, 'gr_x_axis', gr_x_axis, 'Camera')
        else:
            print('gr_x_axis is nan')
        if not np.all(np.isnan(gr_y_axis)):
            add_dataset(screen, 'gr_y_axis', gr_y_axis, 'Camera')
        else:
            print('gr_y_axis is nan')
        if not np.all(np.isnan(gr_x_fit_gauss_function)):
            add_dataset(screen, 'gr_x_fit_gauss_function', gr_x_fit_gauss_function, 'Camera')
        else:
            print('gr_x_fit_gauss_function is nan')
        if not np.all(np.isnan(gr_y_fit_gauss_function)):
            add_dataset(screen, 'gr_y_fit_gauss_function', gr_y_fit_gauss_function, 'Camera')
        else:
            print('gr_y_fit_gauss_function is nan')

        if 'Magnet_data' in saved_dict:
            for n_magnet, magnet in enumerate(saved_dict['Magnet_data']['Magnets']):
                mag_group = method.create_group('actuators/%s' % magnet)
                add_dataset(mag_group, 'K', saved_dict['Magnet_data']['K'][n_magnet], 'Magnet')
                add_dataset(mag_group, 'I-SET', saved_dict['Magnet_data']['I-SET'][n_magnet], 'Magnet')
        elif not saved_dict['Input']['Dry run'] in (np.array(False), False):
            raise ValueError('No magnet data')
        else:
            print('Magnet data not saved.')