From 123362f4dbfef5188d733d147e81c7c1ec73f2c7 Mon Sep 17 00:00:00 2001 From: Sven Augustin Date: Sat, 27 Mar 2021 18:53:21 +0100 Subject: [PATCH] added author collection/conversion tools --- collect_authors.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++ convert_authors.py | 73 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100755 collect_authors.py create mode 100755 convert_authors.py diff --git a/collect_authors.py b/collect_authors.py new file mode 100755 index 0000000..9363cab --- /dev/null +++ b/collect_authors.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +DEFAULT_SEP = " : " + + +import argparse + +parser = argparse.ArgumentParser(description="Collect authors from an elog dump ...") + +parser.add_argument("-d", "--dump", default="dump", help="Folder containing the elog dump") +parser.add_argument("-o", "--output", default="authors", help="Output file name") +parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")") +parser.add_argument("-p", "--print", action="store_true", help="Print authors") + +clargs = parser.parse_args() + + + +from pathlib import Path +import json + + +def collect(folder): + dump = Path(folder) + fns = dump.glob("msg*.json") + authors = set() + for fn in sorted(fns): + data = json_load(fn) + author = data["Author"] + authors.add(author) + + authors = sorted(authors) + return authors + + +def check(authors, print_all=False): + for a in authors: + if print_all: + print(a) + + stripped_author = a.strip() + if a != stripped_author: + print(f"Warning: Author \"{author}\" has strippable spaces.") + + if a == "": + print("Warning: Author is the empty string.") + + +def save(authors, output, sep): + data = [f"{a}{sep}" for a in authors] + text_save(data, output) + + +def json_load(fname): + with open(fname, "r") as f: + return json.load(f) + +def text_save(data, fname): + with open(fname, "w") as f: + for line in data: + f.write(line) + f.write("\n") + + + + + +if __name__ == "__main__": + authors = collect(clargs.dump) + check(authors, clargs.print) + save(authors, clargs.output, clargs.separator) + + + diff --git a/convert_authors.py b/convert_authors.py new file mode 100755 index 0000000..cd7d196 --- /dev/null +++ b/convert_authors.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +DEFAULT_SEP = " : " + + +import argparse + +parser = argparse.ArgumentParser(description="Convert collected and mapped authors to json ...") + +parser.add_argument("-i", "--input", default="authors", help="Input file name") +parser.add_argument("-o", "--output", default="authors.json", help="Output file name") +parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")") +parser.add_argument("-d", "--default", help="Default user") +parser.add_argument("-p", "--print", action="store_true", help="Print authors") + +clargs = parser.parse_args() + + + +from pathlib import Path +import json + + +def author_load(fname, sep, default): + data = text_load(fname) + res = {} + for line in data: + line = line.split(sep) + old, new = line + if new == "": + print(f"Warning: will use default ({default}) for author \"{old}\".") + new = default + res[old] = new + return res + +def text_load(fname): + res = [] + with open(fname, "r") as f: + for line in f: + line = line.split("#")[0] # remove comments + line = line.rstrip("\n") + if not line: + continue + res.append(line) + return res + +def print_dict(d): + length = maxstrlen(d.keys()) + for k, v in d.items(): + print(k.rjust(length), "->", v) + +def maxstrlen(seq): + return max(strlen(i) for i in seq) + +def strlen(val): + return len(str(val)) + +def json_dump(data, fname): + with open(fname, "w") as f: + json.dump(data, f, sort_keys=True, indent=4) + + + + + +if __name__ == "__main__": + author_map = author_load(clargs.input, clargs.separator, clargs.default) + if clargs.print: + print_dict(author_map) + json_dump(author_map, clargs.output) + + +