added author collection/conversion tools
This commit is contained in:
74
collect_authors.py
Executable file
74
collect_authors.py
Executable file
@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
DEFAULT_SEP = " : "
|
||||
|
||||
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Collect authors from an elog dump ...")
|
||||
|
||||
parser.add_argument("-d", "--dump", default="dump", help="Folder containing the elog dump")
|
||||
parser.add_argument("-o", "--output", default="authors", help="Output file name")
|
||||
parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")")
|
||||
parser.add_argument("-p", "--print", action="store_true", help="Print authors")
|
||||
|
||||
clargs = parser.parse_args()
|
||||
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
|
||||
def collect(folder):
|
||||
dump = Path(folder)
|
||||
fns = dump.glob("msg*.json")
|
||||
authors = set()
|
||||
for fn in sorted(fns):
|
||||
data = json_load(fn)
|
||||
author = data["Author"]
|
||||
authors.add(author)
|
||||
|
||||
authors = sorted(authors)
|
||||
return authors
|
||||
|
||||
|
||||
def check(authors, print_all=False):
|
||||
for a in authors:
|
||||
if print_all:
|
||||
print(a)
|
||||
|
||||
stripped_author = a.strip()
|
||||
if a != stripped_author:
|
||||
print(f"Warning: Author \"{author}\" has strippable spaces.")
|
||||
|
||||
if a == "":
|
||||
print("Warning: Author is the empty string.")
|
||||
|
||||
|
||||
def save(authors, output, sep):
|
||||
data = [f"{a}{sep}" for a in authors]
|
||||
text_save(data, output)
|
||||
|
||||
|
||||
def json_load(fname):
|
||||
with open(fname, "r") as f:
|
||||
return json.load(f)
|
||||
|
||||
def text_save(data, fname):
|
||||
with open(fname, "w") as f:
|
||||
for line in data:
|
||||
f.write(line)
|
||||
f.write("\n")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
authors = collect(clargs.dump)
|
||||
check(authors, clargs.print)
|
||||
save(authors, clargs.output, clargs.separator)
|
||||
|
||||
|
||||
|
73
convert_authors.py
Executable file
73
convert_authors.py
Executable file
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
DEFAULT_SEP = " : "
|
||||
|
||||
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Convert collected and mapped authors to json ...")
|
||||
|
||||
parser.add_argument("-i", "--input", default="authors", help="Input file name")
|
||||
parser.add_argument("-o", "--output", default="authors.json", help="Output file name")
|
||||
parser.add_argument("-s", "--separator", default=DEFAULT_SEP, help=f"Key-value separator in the output (default: \"{DEFAULT_SEP}\")")
|
||||
parser.add_argument("-d", "--default", help="Default user")
|
||||
parser.add_argument("-p", "--print", action="store_true", help="Print authors")
|
||||
|
||||
clargs = parser.parse_args()
|
||||
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
|
||||
def author_load(fname, sep, default):
|
||||
data = text_load(fname)
|
||||
res = {}
|
||||
for line in data:
|
||||
line = line.split(sep)
|
||||
old, new = line
|
||||
if new == "":
|
||||
print(f"Warning: will use default ({default}) for author \"{old}\".")
|
||||
new = default
|
||||
res[old] = new
|
||||
return res
|
||||
|
||||
def text_load(fname):
|
||||
res = []
|
||||
with open(fname, "r") as f:
|
||||
for line in f:
|
||||
line = line.split("#")[0] # remove comments
|
||||
line = line.rstrip("\n")
|
||||
if not line:
|
||||
continue
|
||||
res.append(line)
|
||||
return res
|
||||
|
||||
def print_dict(d):
|
||||
length = maxstrlen(d.keys())
|
||||
for k, v in d.items():
|
||||
print(k.rjust(length), "->", v)
|
||||
|
||||
def maxstrlen(seq):
|
||||
return max(strlen(i) for i in seq)
|
||||
|
||||
def strlen(val):
|
||||
return len(str(val))
|
||||
|
||||
def json_dump(data, fname):
|
||||
with open(fname, "w") as f:
|
||||
json.dump(data, f, sort_keys=True, indent=4)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
author_map = author_load(clargs.input, clargs.separator, clargs.default)
|
||||
if clargs.print:
|
||||
print_dict(author_map)
|
||||
json_dump(author_map, clargs.output)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user