Files
elog2scilog/e2sci.py

157 lines
3.9 KiB
Python
Executable File

#!/usr/bin/env python
import argparse
parser = argparse.ArgumentParser(
description="Convert an elog dump to a scilog json",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-d", "--dump", default="dump", help="Folder containing the elog dump")
parser.add_argument("-a", "--authors", default="authors.json", help="JSON file containing the author mapping")
parser.add_argument("-o", "--output", default="scilog.json", help="Output file name")
parser.add_argument("-u", "--default-user", default="default", help="Default user")
parser.add_argument("-p", "--default-pgroup", default="default", help="Default pgroup")
clargs = parser.parse_args()
fn_authors = clargs.authors
fn_dump = clargs.dump
fn_output = clargs.output
default_user = clargs.default_user
default_pgroup = clargs.default_pgroup
from glob import iglob
import json
def json_load(filename, *args, **kwargs):
with open(filename, "r") as f:
return json.load(f, *args, **kwargs)
def json_save(what, filename, *args, indent=4, sort_keys=True, **kwargs):
with open(filename, "w") as f:
json.dump(what, f, *args, indent=indent, sort_keys=sort_keys, **kwargs)
mapped = {
# "Author": "createdBy",
"Date": "timestamp",
# "message": "textcontent",
# "P-Group": "ownerGroup"
}
ignored = [
"Encoding", # seems to be always HTML
"MID", # message ID
"Section", # seems to be always the endstation
"When" # unclear how this differs from Date
]
special = [
"Author",
"attachments",
"P-Group",
"message",
"Title"
]
# The key of the tags is not written into the output since it does not look to be very useful information for the given examples
# if there was a case where the key is useful, we could write "Key:Value" as tag instead of just "Value".
# We might want to introduce a separate category (tags_with_keys) for these.
tags = [
"Component",
"Entry",
"System",
"Category",
"Domain",
"Effect",
"Valid until",
"Group",
"Type"
]
authors = json_load(fn_authors)
fns = sorted(iglob(fn_dump + "/msg*.json"))
res = []
for fn in fns:
data_out = {}
data_in = json_load(fn)
for k, v in data_in.items():
if k in mapped or k in ignored or k in special or k in tags:
continue
print(fn, "has unassigned key:", k, "=", repr(v))
# remove ignored entries
for i in ignored:
data_in.pop(i)
# copy mapped entries
tmp = {o: data_in.pop(i) for i, o in mapped.items()}
tmp = {k: v for k, v in tmp.items() if v}
data_out.update(tmp)
# make tags
tmp = [data_in.pop(t, None) for t in tags]
tmp = [i for i in tmp if i]
if tmp:
data_out["tags"] = tmp
# treat special entries
author = data_in.pop("Author")
author = authors.get(author, default_user)
data_out["createdBy"] = author
pgroup = data_in.pop("P-Group", default_pgroup)
data_out["ownerGroup"] = pgroup
data_out["accessGroups"] = [author, pgroup]
mesg = data_in.pop("message")
title = data_in.pop("Title")
title = title.strip()
title = f"<p><h3>{title}</h3></p>"
textcontent = title + mesg
files = []
attchs = data_in.pop("attachments")
if attchs:
for fn in attchs:
f_hash = 12345 #TODO
f_id = 67890 #TODO
f_ext = fn.split(".")[-1]
textcontent += f"<p><a class=\"fileLink\" target=\"_blank\" href=\"file:{f_hash}\">{fn}</a></p>"
files.append({
"fileHash": f_hash,
"fileId": f_id,
"fileExtension": f"file/{f_ext}"
})
data_out["textcontent"] = textcontent
data_out["files"] = files
if data_in:
raise ValueError(f"there are unassigned entries in \"{fn}\": {data_in}")
data_out["snippetType"] = data_out["linkType"] = "paragraph"
res.append(data_out)
json_save(res, fn_output)