From 1f3aa6676e91a61cd13511b63bd3b08c6f894c6c Mon Sep 17 00:00:00 2001 From: Douglas Clowes Date: Thu, 5 Jun 2014 11:36:29 +1000 Subject: [PATCH] Create the gitloader and gitrefer progs and tie in with compareSICS --- site_ansto/instrument/compareSICS.py | 52 +++++++- site_ansto/instrument/util/gitloader.py | 66 ++++++++++ site_ansto/instrument/util/gitrefer.py | 159 ++++++++++++++++++++++++ 3 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 site_ansto/instrument/util/gitloader.py create mode 100644 site_ansto/instrument/util/gitrefer.py diff --git a/site_ansto/instrument/compareSICS.py b/site_ansto/instrument/compareSICS.py index 9d0bff4c..b2722cdf 100755 --- a/site_ansto/instrument/compareSICS.py +++ b/site_ansto/instrument/compareSICS.py @@ -36,6 +36,7 @@ import sys import argparse import difflib import shutil +import datetime def load_manifest(theManifest): ''' @@ -131,7 +132,12 @@ if __name__ == "__main__": parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="emit tables for debugging") parser.add_argument("--link", dest="link", action="store_true", help="link deployed files to /tmp tree") parser.add_argument("--copy", dest="copy", action="store_true", help="copy deployed files to /tmp tree") + parser.add_argument("--show", dest="show", action="store_true", help="show the differences") + parser.add_argument("--swap", dest="swap", action="store_true", help="swap the command arguments") parser.add_argument("-m", "--manifest", dest="manifest", help="specify another manifest directory") + parser.add_argument("--command", dest="command", help="specify a command for diffing") + parser.add_argument("--gitrefer", action="store_true", help="output a gitrefer commandstring") + parser.add_argument("--gitreferall", action="store_true", help="output a gitrefer commandstring for all files") parser.add_argument("path", nargs="?", default = default_dir, help="directory containing FILEMAP.TXT") args = parser.parse_args() if args.verbose: @@ -189,6 +195,9 @@ if __name__ == "__main__": print os.path.join(default_dir, os.path.join(entry, key)) print "Changed Files:" + delta_list = [] + sigma_list = [] + print_list = {} for key in sorted(gmap): if key.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")): continue @@ -201,6 +210,8 @@ if __name__ == "__main__": source = os.path.join(amap["SRCDIR"], fmap[target]) source = os.path.join(os.getenv("PWD"), fmap[target]) destin = os.path.join(default_dir, target) + if os.path.exists(destin): + sigma_list.append(destin) if not (os.path.exists(source) and os.path.exists(destin)): print " Compare:", source, destin if not os.path.exists(source): @@ -208,12 +219,45 @@ if __name__ == "__main__": if not os.path.exists(destin): print " ", destin, "does not exist" continue - delta = list(difflib.unified_diff(open(source).readlines(), open(destin).readlines())) + delta = list(difflib.unified_diff(\ + open(source).read().splitlines(),\ + open(destin).read().splitlines(),\ + fromfile=source,\ + tofile=destin,\ + fromfiledate=datetime.datetime.fromtimestamp(os.path.getmtime(source)),\ + tofiledate=datetime.datetime.fromtimestamp(os.path.getmtime(destin)),\ + lineterm="")) delta_len = len(delta) if delta_len > 0: - if verbose: - print delta - print source, destin, "#%d" % delta_len + print_list[destin] = (source, list(delta)) + + for destin in sorted(print_list.keys()): + delta_list.append(destin) + source, delta = print_list[destin] + if args.command: + if args.swap: + print args.command, destin, source + else: + print args.command, source, destin + else: + print source, destin, "#%d" % len(delta) + if args.show: + for line in delta: + print line + + print "Changed file count:", len(delta_list) + + if args.gitreferall: + print "gitrefer", + for filename in sigma_list: + print filename, + print + + if args.gitrefer: + print "gitrefer", + for filename in delta_list: + print filename, + print if args.link: print "Linking Files:" diff --git a/site_ansto/instrument/util/gitloader.py b/site_ansto/instrument/util/gitloader.py new file mode 100644 index 00000000..d06d6765 --- /dev/null +++ b/site_ansto/instrument/util/gitloader.py @@ -0,0 +1,66 @@ +#!/usr/bin/python +# vim: tabstop=8 softtabstop=4 shiftwidth=4 nocindent smartindent +import os, sys +import sqlite3 +import subprocess +import shlex +import traceback + +def setup_database(): + """Create a database to hold the data""" + global conn, curs + if os.path.exists("gitxref.sqlite"): + os.remove("gitxref.sqlite") + conn = sqlite3.connect("gitxref.sqlite") + curs = conn.cursor() + curs.execute("CREATE TABLE commit_tab (commit_hash TEXT, timestamp INTEGER, message TEXT)") + curs.execute("CREATE TABLE blob_tab (commit_hash TEXT, blob_hash TEXT, blob_name TEXT)") + +def populate_data(): + global conn, curs + cmd = 'git log --pretty=format:"%T %H %at %s" ' + args.branch + log_txt, err = subprocess.Popen(shlex.split(cmd),\ + stdout=subprocess.PIPE,\ + stderr=subprocess.PIPE).communicate() + log_txt = log_txt.splitlines() + err = err.splitlines() + for log_line in log_txt: + tree_hash, commit_hash, timestamp, message = log_line.split(" ", 3) + curs.execute("insert into commit_tab values (:1, :2, :3)",\ + (commit_hash, timestamp, repr(message))) + + cmd = "git ls-tree -r " + tree_hash + ls_txt, err = subprocess.Popen(shlex.split(cmd),\ + stdout=subprocess.PIPE,\ + stderr=subprocess.PIPE).communicate() + ls_txt = ls_txt.splitlines() + err = err.splitlines() + for line in ls_txt: + a, b, blob_hash, fname = line.split(None, 3) + curs.execute("insert into blob_tab values (:1, :2, :3)",\ + (commit_hash, blob_hash, fname)) + +def create_indexes(): + global conn, curs + curs.execute("CREATE INDEX commit_commit on commit_tab (commit_hash)") + curs.execute("CREATE INDEX commit_blob on blob_tab (commit_hash)") + curs.execute("CREATE INDEX blob_blob on blob_tab (blob_hash)") + +def main_program(): + setup_database() + populate_data() + create_indexes() + +if __name__ == "__main__": + global args + import argparse + import cProfile + parser = argparse.ArgumentParser() + parser.add_argument("-b", "--branch", default="HEAD", help="load for branch [HEAD]") + parser.add_argument("-d", "--debug", action="store_true", help="debugging output") + parser.add_argument("-p", "--profile", action="store_true", help="profile output") + args = parser.parse_args() + if args.profile: + cProfile.run('main_program()') + else: + main_program() diff --git a/site_ansto/instrument/util/gitrefer.py b/site_ansto/instrument/util/gitrefer.py new file mode 100644 index 00000000..0080ffe0 --- /dev/null +++ b/site_ansto/instrument/util/gitrefer.py @@ -0,0 +1,159 @@ +#!/usr/bin/python +# vim: tabstop=8 softtabstop=4 shiftwidth=4 nocindent smartindent +import os, sys +import sqlite3 +import datetime +import subprocess +import shlex +import argparse +import string + +def get_hash_from_file(filename): + """Generate the git hash of a file""" + global debug + cmd = "git hash-object " + filename + hashtxt, err = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + hashtxt = hashtxt.splitlines() + err = err.splitlines() + if debug: + for line in hashtxt: + print " Txt: %s" % line + for line in err: + print " Err: %s" % line + obj_hash = hashtxt[0].strip() + if Verbose: + print "Hash:", obj_hash, filename + return obj_hash + +def do_one_way(the_hashes): + global conn + global curs + conn = sqlite3.connect("gitxref.sqlite") + curs = conn.cursor() + blob_map = {} + commit_map = {} + + select = "select blob_hash, blob_name, c.commit_hash, timestamp, message "+\ + "from blob_tab b, commit_tab c "+\ + "where b.blob_hash == :1 and b.commit_hash == c.commit_hash "+\ + "order by timestamp" + for one_hash in the_hashes: + curs.execute(select, (one_hash,)) + old_hash = None + for blob_hash, blob_name, commit_hash, timestamp, message in curs.fetchall(): + if blob_hash != old_hash: + if Verbose: + print "File:", blob_hash, blob_name + old_hash = blob_hash + if blob_hash not in blob_map: + blob_map[blob_hash] = (blob_name, set()) + blob_map[blob_hash][1].add(commit_hash) + if commit_hash not in commit_map: + commit_map[commit_hash] = (timestamp, message, set()) + commit_map[commit_hash][2].add(blob_hash) + if len(message) > 60: + message = message[:57] + "..." + timestamp = datetime.datetime.fromtimestamp(timestamp) + if Verbose: + print " Commit:", commit_hash, timestamp, message + commit_union = set(commit_map.keys()) + junk = sorted(commit_map.keys(),\ + key=lambda x: (len(commit_map[x][2]), -commit_map[x][0]),\ + reverse=True) + for blob_key in blob_map.keys(): + blob_name, commit_set = blob_map[blob_key] + commit_union.intersection_update(commit_set) + if len(commit_union) == 0: + print "Recalculating commit_map" + for commit_key in sorted(commit_map.keys(), key=lambda x: len(commit_map[x][2]), reverse=True): + timestamp, message, blobs = commit_map[commit_key] + if len(commit_union) == 0: + blob_union = set(blobs) + if len(blob_union.intersection(blobs)) < len(blob_union): + break; + commit_union.add(commit_key) + if 1 in Summary: + print "Summary1: Files with Commits" + for blob_key in blob_map.keys(): + blob_name, commit_set = blob_map[blob_key] + print blob_key +":", blob_name, len(commit_set) + if 2 in Summary: + print "Summary2: Commits with most files" + for commit_key in sorted(commit_union, key=lambda x: commit_map[x][0]): + timestamp, message, blobs = commit_map[commit_key] + timestamp = datetime.datetime.fromtimestamp(timestamp) + if len(message) > 50: + message = message[:47] + "..." + print " Commit:", commit_key, timestamp, len(blobs), message + if 3 in Summary: + print "Summary3: Commits with files" + for commit_key in sorted(commit_map.keys(), key=lambda x: commit_map[x][0]): + timestamp, message, blobs = commit_map[commit_key] + timestamp = datetime.datetime.fromtimestamp(timestamp) + if len(message) > 60: + message = message[:57] + "..." + print " Commit:", commit_key, timestamp, len(blobs), message + if 4 in Summary: + print "Summary4: Files not in Summary 2" + for blob_key in blob_map.keys(): + blob_name, commit_set = blob_map[blob_key] + if len(commit_union.intersection(commit_set)) == 0: + print "\n"+blob_key +":", blob_name, len(commit_set), commit_set + if 5 in Summary: + print "Summary5: Files without Commits" + for hash_key in [x for x in hash_map.keys() if x not in blob_map]: + print hash_key, hash_map[hash_key] + +def do_args(): + global hash_map, args + the_hashes = [] + hash_map = {} + for filename in args.filenames: + if '.' in filename: + one_hash = get_hash_from_file(filename) + hash_map[one_hash] = filename + the_hashes.append(one_hash) + elif all(a in string.hexdigits for a in filename): + the_hashes.append(filename) + do_one_way(the_hashes) + +def main_program(): + global debug, Verbose, Summary + global hash_map, args + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--debug", help="Add debugging output", action="store_true") + parser.add_argument("-p", "--profile", help="Add profile output", action="store_true") + parser.add_argument("-v", "--verbose", help="Add verbose output", action="store_true") + parser.add_argument("-s", "--summary", \ + default = "2", \ + help="select summary") + parser.add_argument("filenames", metavar="file", help="existing filename(s)", nargs="+") + args = parser.parse_args() + if args.debug: + debug = True + else: + debug = False + if args.verbose: + Verbose = True + else: + Verbose = False + if Verbose or debug: + print args + Summary = set() + for rng in [x.strip() for x in args.summary.split(',')]: + if '-' in rng: + lo, hi = rng.split('-') + if lo.isdigit() and hi.isdigit(): + Summary.add(range(int(lo), int(hi)+1)) + else: + if rng.isdigit(): + Summary.add(int(rng)) + print "Summary:", Summary + if args.profile: + import cProfile + cProfile.run('do_args()') + else: + do_args() + +if __name__ == "__main__": + main_program();