#!/usr/bin/env python import argparse import json import re from pathlib import Path from tqdm import tqdm INSTRUMENTS = [ "alvra", "bernina", "cristallina", "diavolezza", "maloja", "furka" ] def json_load(filename, *args, **kwargs): with open(filename, "r") as f: return json.load(f, *args, **kwargs) def json_save(what, filename, *args, indent=4, sort_keys=False, overwrite=False, **kwargs): mode = "w" if overwrite else "x" with open(filename, mode) as f: json.dump(what, f, *args, indent=indent, sort_keys=sort_keys, **kwargs) def warn(*args): orange = "\033[93m" cprint(args, "warning", orange) def warn_dryrun(*args): green = "\033[92m" cprint(args, "dry run", green) def warn_overwrite(*args): red = "\033[91m" cprint(args, "overwrite protection", red) def cprint(args, prefix, color): clear = "\033[0m" s = " ".join(str(i) for i in args) s = f"{color}{prefix}: {s}{clear}" tqdm.write(s) def check_pgroup(string): if not is_pgroup(string): raise argparse.ArgumentTypeError(f'"{string}" is not a pgroup') return string def is_pgroup(string): pattern = r"^p\d{5}$" return re.match(pattern, string) def check_equal(val, ref): assert val == ref, f'expected "{ref}" but got "{val}"' example_text = """usage examples: Dry run (nothing is changed or overwritten) %(prog)s alvra p12345 Create new files called scan_mod.json %(prog)s alvra p12345 --no-dryrun Overwrite scan.json %(prog)s alvra p12345 --no-dryrun --overwrite --inplace """ parser = argparse.ArgumentParser(epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("instrument", choices=INSTRUMENTS, help="Which instrument has the data been measured at?") parser.add_argument("pgroup", type=check_pgroup, help="Which pgroup is the data in?") parser.add_argument("--no-dryrun", dest="dryrun", action="store_false", help="Disable dry run. If dryrun is enabled (default) no files are written.") parser.add_argument("--inplace", action="store_true", help="Update scan.json in place. If inplace is disabled (default) a new file scan_mod.json is created.") parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files. If overwrite is disabled (default) existing files will be skipped.") clargs = parser.parse_args() #print(clargs) #raise SystemExit instrument = clargs.instrument pgroup = clargs.pgroup path = f"/sf/{instrument}/data/{pgroup}/work/retrieve/sf/{instrument}/data/{pgroup}/raw" path = Path(path) if not path.is_dir(): raise SystemExit(f'unarchived directory "{path}" does not exist') print("working on:", path) print() fns = path.glob("*/meta/scan.json") fns = sorted(fns) for jfn in tqdm(fns): # print(jfn) tqdm.write(str(jfn)) jdat = json_load(jfn) scan_files = jdat["scan_files"] for i, step_files in enumerate(scan_files): for j, fn in enumerate(step_files): # print("old fn:", fn) fn = Path(fn) root, sf, instr, data, pgroup, raw, *remainder = fn.parts try: check_equal(root, "/") check_equal(sf, "sf") check_equal(data, "data") check_equal(raw, "raw") assert instr in INSTRUMENTS assert is_pgroup(pgroup) except AssertionError as e: raise SystemExit(e) new_fn = [ root, sf, instr, data, pgroup, "work", "retrieve", sf, instr, data, pgroup, raw, *remainder ] new_fn = Path(*new_fn) if not new_fn.is_file(): warn(f'unarchived file "{new_fn}" does not exist') #TODO: might be better to delete entries where the file is missing new_fn = str(new_fn) # print("new fn:", new_fn) assert new_fn.endswith(str(fn)) scan_files[i][j] = new_fn new_jfn = "scan.json" if clargs.inplace else "scan_mod.json" new_jfn = jfn.parent / new_jfn if clargs.dryrun: warn_dryrun(f"would save: {new_jfn}") continue if new_jfn.exists() and not clargs.overwrite: warn_overwrite(f"skipping existing file:", new_jfn, "\nyou might want to set --overwrite") continue json_save(jdat, new_jfn, overwrite=clargs.overwrite)