epics-pids/rewrite_epicsdata_pids.py

#!/usr/bin/env python

import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("instrument")
parser.add_argument("pgroup")
parser.add_argument("-c", "--channel", default="SAR-CVME-TIFALL4:EvtSet", help="BS channel used as source for the timestamp/pulseID mapping")
clargs = parser.parse_args()

instrument = clargs.instrument
pgroup = clargs.pgroup
bs_source_channel = clargs.channel


import os
from glob import glob
import numpy as np
from scipy.spatial import KDTree
import h5py


class KDTree1D(KDTree):

    def __init__(self, arr):
        arr = arr.reshape(-1, 1)
        super().__init__(arr)

    def query(self, arr):
        arr = arr.reshape(-1, 1)
        return super().query(arr)


def get_fn(fns, contains):
    fn = [fn for fn in fns if contains in fn]
    assert len(fn) == 1
    return fn[0]


print("globbing")
fns_pattern = "/sf/{}/data/{}/raw/**/*.PVCHANNELS.h5".format(instrument, pgroup)
fns = sorted(glob(fns_pattern, recursive=True))

if not fns:
    raise SystemExit("no file found for: {}".format(fns_pattern))

for i, fn_pv in enumerate(fns):
    fn_bs = fn_pv.replace(".PVCHANNELS.h5", ".BSDATA.h5")

    new_fn_pv = fn_pv.replace("/raw/", "/res/epics/").replace(".PVCHANNELS.h5", ".EPICSDATA.h5")
    folder = os.path.dirname(new_fn_pv)
    os.makedirs(folder, exist_ok=True)

    print(i)
    print(fn_bs, fn_pv)
    print("->", new_fn_pv)

    if os.path.exists(new_fn_pv):
        print(new_fn_pv, "exists... skipping")
        continue

#    ch_evts = "SAR-CVME-TIFALL4:EvtSet"
#    ch_evts = "SAR-CVME-TIFALL5:EvtSet"
    ch_evts = bs_source_channel

    with h5py.File(fn_bs, "r") as f:
        try:
            bs_ps = f[ch_evts + "/pulse_id"][:]
            bs_ts = f[ch_evts + "/timestamp"][:]
        except KeyError:
            print("broken file", fn_bs)
            continue

#    print(bs_ts)

    bs_ts_tree = KDTree1D(bs_ts)

    empty_chs = []

    with h5py.File(fn_pv, "r") as f, h5py.File(new_fn_pv, "x") as n:
        if not len(f.keys()):
            print("empty file")
        for ch in f:
            pv_data = f[ch + "/data"][:]
            pv_ts = f[ch + "/timestamp"][:]
            if not pv_ts.size:
                empty_chs.append(ch)
                continue

            _dists, indices = bs_ts_tree.query(pv_ts)
            pv_ps = bs_ps[indices]
#            print(ch, indices, pv_ps)

            n.create_dataset(ch + "/data",      data=pv_data)
            n.create_dataset(ch + "/timestamp", data=pv_ts)
            n.create_dataset(ch + "/pulse_id",  data=pv_ps)

    print("#empty", len(empty_chs))