Create the gitloader and gitrefer progs and tie in with compareSICS

2014-06-05 11:36:29 +10:00
parent 49361e3430
commit 1f3aa6676e
3 changed files with 273 additions and 4 deletions
--- a/site_ansto/instrument/util/gitloader.py
+++ b/site_ansto/instrument/util/gitloader.py
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+# vim: tabstop=8  softtabstop=4  shiftwidth=4  nocindent  smartindent
+import os, sys
+import sqlite3
+import subprocess
+import shlex
+import traceback
+
+def setup_database():
+    """Create a database to hold the data"""
+    global conn, curs
+    if os.path.exists("gitxref.sqlite"):
+        os.remove("gitxref.sqlite")
+    conn = sqlite3.connect("gitxref.sqlite")
+    curs = conn.cursor()
+    curs.execute("CREATE TABLE commit_tab (commit_hash TEXT, timestamp INTEGER, message TEXT)")
+    curs.execute("CREATE TABLE blob_tab (commit_hash TEXT, blob_hash TEXT, blob_name TEXT)")
+
+def populate_data():
+    global conn, curs
+    cmd = 'git log --pretty=format:"%T %H %at %s" ' + args.branch
+    log_txt, err = subprocess.Popen(shlex.split(cmd),\
+            stdout=subprocess.PIPE,\
+            stderr=subprocess.PIPE).communicate()
+    log_txt = log_txt.splitlines()
+    err = err.splitlines()
+    for log_line in log_txt:
+        tree_hash, commit_hash, timestamp, message = log_line.split(" ", 3)
+        curs.execute("insert into commit_tab values (:1, :2, :3)",\
+            (commit_hash, timestamp, repr(message)))
+
+        cmd = "git ls-tree -r " + tree_hash
+        ls_txt, err = subprocess.Popen(shlex.split(cmd),\
+                stdout=subprocess.PIPE,\
+                stderr=subprocess.PIPE).communicate()
+        ls_txt = ls_txt.splitlines()
+        err = err.splitlines()
+        for line in ls_txt:
+            a, b, blob_hash, fname = line.split(None, 3)
+            curs.execute("insert into blob_tab values (:1, :2, :3)",\
+                (commit_hash, blob_hash, fname))
+
+def create_indexes():
+    global conn, curs
+    curs.execute("CREATE INDEX commit_commit on commit_tab (commit_hash)")
+    curs.execute("CREATE INDEX commit_blob on blob_tab (commit_hash)")
+    curs.execute("CREATE INDEX blob_blob on blob_tab (blob_hash)")
+
+def main_program():
+    setup_database()
+    populate_data()
+    create_indexes()
+
+if __name__ == "__main__":
+    global args
+    import argparse
+    import cProfile
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-b", "--branch", default="HEAD", help="load for branch [HEAD]")
+    parser.add_argument("-d", "--debug", action="store_true", help="debugging output")
+    parser.add_argument("-p", "--profile", action="store_true", help="profile output")
+    args = parser.parse_args()
+    if args.profile:
+        cProfile.run('main_program()')
+    else:
+        main_program()
--- a/site_ansto/instrument/util/gitrefer.py
+++ b/site_ansto/instrument/util/gitrefer.py
@@ -0,0 +1,159 @@
+#!/usr/bin/python
+# vim: tabstop=8  softtabstop=4  shiftwidth=4  nocindent  smartindent
+import os, sys
+import sqlite3
+import datetime
+import subprocess
+import shlex
+import argparse
+import string
+
+def get_hash_from_file(filename):
+    """Generate the git hash of a file"""
+    global debug
+    cmd = "git hash-object " + filename
+    hashtxt, err = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+    hashtxt = hashtxt.splitlines()
+    err = err.splitlines()
+    if debug:
+        for line in hashtxt:
+            print "  Txt: %s" % line
+        for line in err:
+            print "  Err: %s" % line
+    obj_hash = hashtxt[0].strip()
+    if Verbose:
+        print "Hash:", obj_hash, filename
+    return obj_hash
+
+def do_one_way(the_hashes):
+    global conn
+    global curs
+    conn = sqlite3.connect("gitxref.sqlite")
+    curs = conn.cursor()
+    blob_map = {}
+    commit_map = {}
+
+    select = "select blob_hash, blob_name, c.commit_hash, timestamp, message "+\
+            "from blob_tab b, commit_tab c "+\
+            "where b.blob_hash == :1 and b.commit_hash == c.commit_hash "+\
+            "order by timestamp"
+    for one_hash in the_hashes:
+        curs.execute(select, (one_hash,))
+        old_hash = None
+        for blob_hash, blob_name, commit_hash, timestamp, message in  curs.fetchall():
+            if blob_hash != old_hash:
+                if Verbose:
+                    print "File:", blob_hash, blob_name
+                old_hash = blob_hash
+            if blob_hash not in blob_map:
+                blob_map[blob_hash] = (blob_name, set())
+            blob_map[blob_hash][1].add(commit_hash)
+            if commit_hash not in commit_map:
+                commit_map[commit_hash] = (timestamp, message, set())
+            commit_map[commit_hash][2].add(blob_hash)
+            if len(message) > 60:
+                message = message[:57] + "..."
+            timestamp = datetime.datetime.fromtimestamp(timestamp)
+            if Verbose:
+                print "  Commit:", commit_hash, timestamp, message
+    commit_union = set(commit_map.keys())
+    junk = sorted(commit_map.keys(),\
+        key=lambda x: (len(commit_map[x][2]), -commit_map[x][0]),\
+        reverse=True)
+    for blob_key in blob_map.keys():
+        blob_name, commit_set = blob_map[blob_key]
+        commit_union.intersection_update(commit_set)
+    if len(commit_union) == 0:
+        print "Recalculating commit_map"
+        for commit_key in sorted(commit_map.keys(), key=lambda x: len(commit_map[x][2]), reverse=True):
+            timestamp, message, blobs = commit_map[commit_key]
+            if len(commit_union) == 0:
+                blob_union = set(blobs)
+            if len(blob_union.intersection(blobs)) < len(blob_union):
+                break;
+            commit_union.add(commit_key)
+    if 1 in Summary:
+        print "Summary1: Files with Commits"
+        for blob_key in blob_map.keys():
+            blob_name, commit_set = blob_map[blob_key]
+            print blob_key +":", blob_name, len(commit_set)
+    if 2 in Summary:
+        print "Summary2: Commits with most files"
+        for commit_key in sorted(commit_union, key=lambda x: commit_map[x][0]):
+            timestamp, message, blobs = commit_map[commit_key]
+            timestamp = datetime.datetime.fromtimestamp(timestamp)
+            if len(message) > 50:
+                message = message[:47] + "..."
+            print "  Commit:", commit_key, timestamp, len(blobs), message
+    if 3 in Summary:
+        print "Summary3: Commits with files"
+        for commit_key in sorted(commit_map.keys(), key=lambda x: commit_map[x][0]):
+            timestamp, message, blobs = commit_map[commit_key]
+            timestamp = datetime.datetime.fromtimestamp(timestamp)
+            if len(message) > 60:
+                message = message[:57] + "..."
+            print "  Commit:", commit_key, timestamp, len(blobs), message
+    if 4 in Summary:
+        print "Summary4: Files not in Summary 2"
+        for blob_key in blob_map.keys():
+            blob_name, commit_set = blob_map[blob_key]
+            if len(commit_union.intersection(commit_set)) == 0:
+                print "\n"+blob_key +":", blob_name, len(commit_set), commit_set
+    if 5 in Summary:
+        print "Summary5: Files without Commits"
+        for hash_key in [x for x in hash_map.keys() if x not in blob_map]:
+            print hash_key, hash_map[hash_key]
+
+def do_args():
+    global hash_map, args
+    the_hashes = []
+    hash_map = {}
+    for filename in args.filenames:
+        if '.' in filename:
+            one_hash = get_hash_from_file(filename)
+            hash_map[one_hash] = filename
+            the_hashes.append(one_hash)
+        elif all(a in string.hexdigits for a in filename):
+            the_hashes.append(filename)
+    do_one_way(the_hashes)
+
+def main_program():
+    global debug, Verbose, Summary
+    global hash_map, args
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-d", "--debug", help="Add debugging output", action="store_true")
+    parser.add_argument("-p", "--profile", help="Add profile output", action="store_true")
+    parser.add_argument("-v", "--verbose", help="Add verbose output", action="store_true")
+    parser.add_argument("-s", "--summary", \
+        default = "2", \
+        help="select summary")
+    parser.add_argument("filenames", metavar="file", help="existing filename(s)", nargs="+")
+    args = parser.parse_args()
+    if args.debug:
+        debug = True
+    else:
+        debug = False
+    if args.verbose:
+        Verbose = True
+    else:
+        Verbose = False
+    if Verbose or debug:
+        print args
+    Summary = set()
+    for rng in [x.strip() for x in args.summary.split(',')]:
+        if '-' in rng:
+            lo, hi = rng.split('-')
+            if lo.isdigit() and hi.isdigit():
+                Summary.add(range(int(lo), int(hi)+1))
+        else:
+            if rng.isdigit():
+                Summary.add(int(rng))
+    print "Summary:", Summary
+    if args.profile:
+        import cProfile
+        cProfile.run('do_args()')
+    else:
+        do_args()
+
+if __name__ == "__main__":
+    main_program();