Files
unarchived_data_fix/unarchived_data_fix.py
2025-06-30 18:53:30 +02:00

161 lines
4.4 KiB
Python
Executable File

#!/usr/bin/env python
import argparse
import json
import re
from pathlib import Path
from tqdm import tqdm
INSTRUMENTS = [
"alvra",
"bernina",
"cristallina",
"diavolezza",
"maloja",
"furka"
]
def json_load(filename, *args, **kwargs):
with open(filename, "r") as f:
return json.load(f, *args, **kwargs)
def json_save(what, filename, *args, indent=4, sort_keys=False, overwrite=False, **kwargs):
mode = "w" if overwrite else "x"
with open(filename, mode) as f:
json.dump(what, f, *args, indent=indent, sort_keys=sort_keys, **kwargs)
def warn(*args):
orange = "\033[93m"
cprint(args, "warning", orange)
def warn_dryrun(*args):
green = "\033[92m"
cprint(args, "dry run", green)
def warn_overwrite(*args):
red = "\033[91m"
cprint(args, "overwrite protection", red)
def cprint(args, prefix, color):
clear = "\033[0m"
s = " ".join(str(i) for i in args)
s = f"{color}{prefix}: {s}{clear}"
tqdm.write(s)
def check_pgroup(string):
if not is_pgroup(string):
raise argparse.ArgumentTypeError(f'"{string}" is not a pgroup')
return string
def is_pgroup(string):
pattern = r"^p\d{5}$"
return re.match(pattern, string)
def check_equal(val, ref):
assert val == ref, f'expected "{ref}" but got "{val}"'
example_text = """usage examples:
Dry run (nothing is changed or overwritten)
%(prog)s alvra p12345
Create new files called scan_mod.json
%(prog)s alvra p12345 --no-dryrun
Overwrite scan.json
%(prog)s alvra p12345 --no-dryrun --overwrite --inplace
"""
parser = argparse.ArgumentParser(epilog=example_text, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("instrument", choices=INSTRUMENTS, help="Which instrument has the data been measured at?")
parser.add_argument("pgroup", type=check_pgroup, help="Which pgroup is the data in?")
parser.add_argument("--no-dryrun", dest="dryrun", action="store_false", help="Disable dry run. If dryrun is enabled (default) no files are written.")
parser.add_argument("--inplace", action="store_true", help="Update scan.json in place. If inplace is disabled (default) a new file scan_mod.json is created.")
parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files. If overwrite is disabled (default) existing files will be skipped.")
clargs = parser.parse_args()
#print(clargs)
#raise SystemExit
instrument = clargs.instrument
pgroup = clargs.pgroup
path = f"/sf/{instrument}/data/{pgroup}/work/retrieve/sf/{instrument}/data/{pgroup}/raw"
path = Path(path)
if not path.is_dir():
raise SystemExit(f'unarchived directory "{path}" does not exist')
print("working on:", path)
print()
fns = path.glob("*/meta/scan.json")
fns = sorted(fns)
for jfn in tqdm(fns):
# print(jfn)
tqdm.write(str(jfn))
jdat = json_load(jfn)
scan_files = jdat["scan_files"]
for i, step_files in enumerate(scan_files):
for j, fn in enumerate(step_files):
# print("old fn:", fn)
fn = Path(fn)
root, sf, instr, data, pgroup, raw, *remainder = fn.parts
try:
check_equal(root, "/")
check_equal(sf, "sf")
check_equal(data, "data")
check_equal(raw, "raw")
assert instr in INSTRUMENTS
assert is_pgroup(pgroup)
except AssertionError as e:
raise SystemExit(e)
new_fn = [
root,
sf, instr, data, pgroup, "work", "retrieve",
sf, instr, data, pgroup, raw, *remainder
]
new_fn = Path(*new_fn)
if not new_fn.is_file():
warn(f'unarchived file "{new_fn}" does not exist')
#TODO: might be better to delete entries where the file is missing
new_fn = str(new_fn)
# print("new fn:", new_fn)
assert new_fn.endswith(str(fn))
scan_files[i][j] = new_fn
new_jfn = "scan.json" if clargs.inplace else "scan_mod.json"
new_jfn = jfn.parent / new_jfn
if clargs.dryrun:
warn_dryrun(f"would save: {new_jfn}")
continue
if new_jfn.exists() and not clargs.overwrite:
warn_overwrite(f"skipping existing file:", new_jfn, "\nyou might want to set --overwrite")
continue
json_save(jdat, new_jfn, overwrite=clargs.overwrite)