added author collection/conversion tools

2021-03-27 18:53:21 +01:00
parent 0ccbaf2b26
commit 123362f4db
2 changed files with 147 additions and 0 deletions
--- a/collect_authors.py
+++ b/collect_authors.py
@ -0,0 +1,74 @@
 #!/usr/bin/env python
 DEFAULT_SEP = " : "
 import argparse
 parser = argparse.ArgumentParser(description="Collect authors from an elog dump ...")
 parser.add_argument("-d", "--dump", default="dump", help="Folder containing the elog dump")
 parser.add_argument("-o", "--output", default="authors", help="Output file name")
 parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")")
 parser.add_argument("-p", "--print", action="store_true", help="Print authors")
 clargs = parser.parse_args()
 from pathlib import Path
 import json
 def collect(folder):
    dump = Path(folder)
    fns = dump.glob("msg*.json")
    authors = set()
    for fn in sorted(fns):
        data = json_load(fn)
        author = data["Author"]
        authors.add(author)
    authors = sorted(authors)
    return authors
 def check(authors, print_all=False):
    for a in authors:
        if print_all:
            print(a)
        stripped_author = a.strip()
        if a != stripped_author:
            print(f"Warning: Author \"{author}\" has strippable spaces.")
        if a == "":
            print("Warning: Author is the empty string.")
 def save(authors, output, sep):
    data = [f"{a}{sep}" for a in authors]
    text_save(data, output)
 def json_load(fname):
    with open(fname, "r") as f:
        return json.load(f)
 def text_save(data, fname):
    with open(fname, "w") as f:
        for line in data:
            f.write(line)
            f.write("\n")
 if __name__ == "__main__":
    authors = collect(clargs.dump)
    check(authors, clargs.print)
    save(authors, clargs.output, clargs.separator)
--- a/convert_authors.py
+++ b/convert_authors.py
@ -0,0 +1,73 @@
 #!/usr/bin/env python
 DEFAULT_SEP = " : "
 import argparse
 parser = argparse.ArgumentParser(description="Convert collected and mapped authors to json ...")
 parser.add_argument("-i", "--input", default="authors", help="Input file name")
 parser.add_argument("-o", "--output", default="authors.json", help="Output file name")
 parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")")
 parser.add_argument("-d", "--default", help="Default user")
 parser.add_argument("-p", "--print", action="store_true", help="Print authors")
 clargs = parser.parse_args()
 from pathlib import Path
 import json
 def author_load(fname, sep, default):
    data = text_load(fname)
    res = {}
    for line in data:
        line = line.split(sep)
        old, new = line
        if new == "":
            print(f"Warning: will use default ({default}) for author \"{old}\".")
            new = default
        res[old] = new
    return res
 def text_load(fname):
    res = []
    with open(fname, "r") as f:
        for line in f:
            line = line.split("#")[0] # remove comments
            line = line.rstrip("\n")
            if not line:
                continue
            res.append(line)
    return res
 def print_dict(d):
    length = maxstrlen(d.keys())
    for k, v in d.items():
        print(k.rjust(length), "->", v)
 def maxstrlen(seq):
    return max(strlen(i) for i in seq)
 def strlen(val):
    return len(str(val))
 def json_dump(data, fname):
    with open(fname, "w") as f:
        json.dump(data, f, sort_keys=True, indent=4)
 if __name__ == "__main__":
    author_map = author_load(clargs.input, clargs.separator, clargs.default)
    if clargs.print:
        print_dict(author_map)
    json_dump(author_map, clargs.output)