added author collection/conversion tools

This commit is contained in:
2021-03-27 18:53:21 +01:00
parent 0ccbaf2b26
commit 123362f4db
2 changed files with 147 additions and 0 deletions

74
collect_authors.py Executable file
View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
DEFAULT_SEP = " : "
import argparse
parser = argparse.ArgumentParser(description="Collect authors from an elog dump ...")
parser.add_argument("-d", "--dump", default="dump", help="Folder containing the elog dump")
parser.add_argument("-o", "--output", default="authors", help="Output file name")
parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")")
parser.add_argument("-p", "--print", action="store_true", help="Print authors")
clargs = parser.parse_args()
from pathlib import Path
import json
def collect(folder):
dump = Path(folder)
fns = dump.glob("msg*.json")
authors = set()
for fn in sorted(fns):
data = json_load(fn)
author = data["Author"]
authors.add(author)
authors = sorted(authors)
return authors
def check(authors, print_all=False):
for a in authors:
if print_all:
print(a)
stripped_author = a.strip()
if a != stripped_author:
print(f"Warning: Author \"{author}\" has strippable spaces.")
if a == "":
print("Warning: Author is the empty string.")
def save(authors, output, sep):
data = [f"{a}{sep}" for a in authors]
text_save(data, output)
def json_load(fname):
with open(fname, "r") as f:
return json.load(f)
def text_save(data, fname):
with open(fname, "w") as f:
for line in data:
f.write(line)
f.write("\n")
if __name__ == "__main__":
authors = collect(clargs.dump)
check(authors, clargs.print)
save(authors, clargs.output, clargs.separator)

73
convert_authors.py Executable file
View File

@ -0,0 +1,73 @@
#!/usr/bin/env python
DEFAULT_SEP = " : "
import argparse
parser = argparse.ArgumentParser(description="Convert collected and mapped authors to json ...")
parser.add_argument("-i", "--input", default="authors", help="Input file name")
parser.add_argument("-o", "--output", default="authors.json", help="Output file name")
parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")")
parser.add_argument("-d", "--default", help="Default user")
parser.add_argument("-p", "--print", action="store_true", help="Print authors")
clargs = parser.parse_args()
from pathlib import Path
import json
def author_load(fname, sep, default):
data = text_load(fname)
res = {}
for line in data:
line = line.split(sep)
old, new = line
if new == "":
print(f"Warning: will use default ({default}) for author \"{old}\".")
new = default
res[old] = new
return res
def text_load(fname):
res = []
with open(fname, "r") as f:
for line in f:
line = line.split("#")[0] # remove comments
line = line.rstrip("\n")
if not line:
continue
res.append(line)
return res
def print_dict(d):
length = maxstrlen(d.keys())
for k, v in d.items():
print(k.rjust(length), "->", v)
def maxstrlen(seq):
return max(strlen(i) for i in seq)
def strlen(val):
return len(str(val))
def json_dump(data, fname):
with open(fname, "w") as f:
json.dump(data, f, sort_keys=True, indent=4)
if __name__ == "__main__":
author_map = author_load(clargs.input, clargs.separator, clargs.default)
if clargs.print:
print_dict(author_map)
json_dump(author_map, clargs.output)