From f7a104580d7d72c39b318d94aecd1513ea57df7d Mon Sep 17 00:00:00 2001 From: Sven Augustin Date: Thu, 26 Oct 2023 12:38:07 +0200 Subject: [PATCH] first try --- unarchived_data_fix.py | 149 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100755 unarchived_data_fix.py diff --git a/unarchived_data_fix.py b/unarchived_data_fix.py new file mode 100755 index 0000000..ef23293 --- /dev/null +++ b/unarchived_data_fix.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python + +import argparse +import json +import re +from pathlib import Path + +from tqdm import tqdm + + +INSTRUMENTS = [ + "alvra", + "bernina", + "cristallina", + "diavolezza", + "maloja", + "furka" +] + + +def json_load(filename, *args, **kwargs): + with open(filename, "r") as f: + return json.load(f, *args, **kwargs) + +def json_save(what, filename, *args, indent=4, sort_keys=False, overwrite=False, **kwargs): + mode = "w" if overwrite else "x" + with open(filename, mode) as f: + json.dump(what, f, *args, indent=indent, sort_keys=sort_keys, **kwargs) + + +def warn(*args): + orange = "\033[93m" + cprint(args, "warning", orange) + +def warn_dryrun(*args): + green = "\033[92m" + cprint(args, "dry run", green) + +def warn_overwrite(*args): + red = "\033[91m" + cprint(args, "overwrite protection", red) + +def cprint(args, prefix, color): + clear = "\033[0m" + s = " ".join(str(i) for i in args) + s = f"{color}{prefix}: {s}{clear}" + tqdm.write(s) + + +def check_pgroup(string): + if not is_pgroup(string): + raise argparse.ArgumentTypeError(f'"{string}" is not a pgroup') + return string + +def is_pgroup(string): + pattern = "^p\d{5}$" + return re.match(pattern, string) + + +def check_equal(val, ref): + assert val == ref, f'expected "{ref}" but got "{val}"' + + + +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + +parser.add_argument("instrument", choices=INSTRUMENTS) +parser.add_argument("pgroup", type=check_pgroup) + +parser.add_argument("--no-dryrun", dest="dryrun", action="store_false") +parser.add_argument("--inplace", action="store_true") +parser.add_argument("--overwrite", action="store_true") + +clargs = parser.parse_args() +#print(clargs) +#raise SystemExit + + +instrument = clargs.instrument +pgroup = clargs.pgroup + +path = f"/sf/{instrument}/data/{pgroup}/work/retrieve/sf/{instrument}/data/{pgroup}/raw" +path = Path(path) +if not path.is_dir(): + raise SystemExit(f'unarchived directory "{path}" does not exist') + +print("working on:", path) +print() + +fns = path.glob("*/meta/scan.json") +fns = sorted(fns) + +for jfn in tqdm(fns): +# print(jfn) + tqdm.write(str(jfn)) + jdat = json_load(jfn) + scan_files = jdat["scan_files"] + for i, step_files in enumerate(scan_files): + for j, fn in enumerate(step_files): +# print("old fn:", fn) + fn = Path(fn) + + root, sf, instr, data, pgroup, raw, *remainder = fn.parts + + try: + check_equal(root, "/") + check_equal(sf, "sf") + check_equal(data, "data") + check_equal(raw, "raw") + assert instr in INSTRUMENTS + assert is_pgroup(pgroup) + except AssertionError as e: + raise SystemExit(e) + + new_fn = [ + root, + sf, instr, data, pgroup, "work", "retrieve", + sf, instr, data, pgroup, raw, *remainder + ] + + new_fn = Path(*new_fn) + + if not new_fn.is_file(): + warn(f'unarchived file "{new_fn}" does not exist') + #TODO: might be better to delete entries where the file is missing + + new_fn = str(new_fn) +# print("new fn:", new_fn) + + assert new_fn.endswith(str(fn)) + + scan_files[i][j] = new_fn + + + new_jfn = "scan.json" if clargs.inplace else "scan_mod.json" + new_jfn = jfn.parent / new_jfn + + if clargs.dryrun: + warn_dryrun(f"would save: {new_jfn}") + continue + + if new_jfn.exists() and not clargs.overwrite: + warn_overwrite(f"skipping existing file:", new_jfn, "\nyou might want to set --overwrite") + continue + + json_save(jdat, new_jfn, overwrite=clargs.overwrite) + + +