#!/usr/bin/env python import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("instrument") parser.add_argument("pgroup") parser.add_argument("-c", "--channel", default="SAR-CVME-TIFALL4:EvtSet", help="BS channel used as source for the timestamp/pulseID mapping") clargs = parser.parse_args() instrument = clargs.instrument pgroup = clargs.pgroup bs_source_channel = clargs.channel import os from glob import glob import numpy as np from scipy.spatial import KDTree import h5py class KDTree1D(KDTree): def __init__(self, arr): arr = arr.reshape(-1, 1) super().__init__(arr) def query(self, arr): arr = arr.reshape(-1, 1) return super().query(arr) def get_fn(fns, contains): fn = [fn for fn in fns if contains in fn] assert len(fn) == 1 return fn[0] print("globbing") fns_pattern = "/sf/{}/data/{}/raw/**/*.PVCHANNELS.h5".format(instrument, pgroup) fns = sorted(glob(fns_pattern, recursive=True)) if not fns: raise SystemExit("no file found for: {}".format(fns_pattern)) for i, fn_pv in enumerate(fns): fn_bs = fn_pv.replace(".PVCHANNELS.h5", ".BSDATA.h5") new_fn_pv = fn_pv.replace("/raw/", "/res/epics/").replace(".PVCHANNELS.h5", ".EPICSDATA.h5") folder = os.path.dirname(new_fn_pv) os.makedirs(folder, exist_ok=True) print(i) print(fn_bs, fn_pv) print("->", new_fn_pv) if os.path.exists(new_fn_pv): print(new_fn_pv, "exists... skipping") continue # ch_evts = "SAR-CVME-TIFALL4:EvtSet" # ch_evts = "SAR-CVME-TIFALL5:EvtSet" ch_evts = bs_source_channel with h5py.File(fn_bs, "r") as f: try: bs_ps = f[ch_evts + "/pulse_id"][:] bs_ts = f[ch_evts + "/timestamp"][:] except KeyError: print("broken file", fn_bs) continue # print(bs_ts) bs_ts_tree = KDTree1D(bs_ts) empty_chs = [] with h5py.File(fn_pv, "r") as f, h5py.File(new_fn_pv, "x") as n: if not len(f.keys()): print("empty file") for ch in f: pv_data = f[ch + "/data"][:] pv_ts = f[ch + "/timestamp"][:] if not pv_ts.size: empty_chs.append(ch) continue _dists, indices = bs_ts_tree.query(pv_ts) pv_ps = bs_ps[indices] # print(ch, indices, pv_ps) n.create_dataset(ch + "/data", data=pv_data) n.create_dataset(ch + "/timestamp", data=pv_ts) n.create_dataset(ch + "/pulse_id", data=pv_ps) print("#empty", len(empty_chs))