Create the gitloader and gitrefer progs and tie in with compareSICS

This commit is contained in:
Douglas Clowes
2014-06-05 11:36:29 +10:00
parent 49361e3430
commit 1f3aa6676e
3 changed files with 273 additions and 4 deletions

View File

@ -36,6 +36,7 @@ import sys
import argparse
import difflib
import shutil
import datetime
def load_manifest(theManifest):
'''
@ -131,7 +132,12 @@ if __name__ == "__main__":
parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="emit tables for debugging")
parser.add_argument("--link", dest="link", action="store_true", help="link deployed files to /tmp tree")
parser.add_argument("--copy", dest="copy", action="store_true", help="copy deployed files to /tmp tree")
parser.add_argument("--show", dest="show", action="store_true", help="show the differences")
parser.add_argument("--swap", dest="swap", action="store_true", help="swap the command arguments")
parser.add_argument("-m", "--manifest", dest="manifest", help="specify another manifest directory")
parser.add_argument("--command", dest="command", help="specify a command for diffing")
parser.add_argument("--gitrefer", action="store_true", help="output a gitrefer commandstring")
parser.add_argument("--gitreferall", action="store_true", help="output a gitrefer commandstring for all files")
parser.add_argument("path", nargs="?", default = default_dir, help="directory containing FILEMAP.TXT")
args = parser.parse_args()
if args.verbose:
@ -189,6 +195,9 @@ if __name__ == "__main__":
print os.path.join(default_dir, os.path.join(entry, key))
print "Changed Files:"
delta_list = []
sigma_list = []
print_list = {}
for key in sorted(gmap):
if key.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")):
continue
@ -201,6 +210,8 @@ if __name__ == "__main__":
source = os.path.join(amap["SRCDIR"], fmap[target])
source = os.path.join(os.getenv("PWD"), fmap[target])
destin = os.path.join(default_dir, target)
if os.path.exists(destin):
sigma_list.append(destin)
if not (os.path.exists(source) and os.path.exists(destin)):
print " Compare:", source, destin
if not os.path.exists(source):
@ -208,12 +219,45 @@ if __name__ == "__main__":
if not os.path.exists(destin):
print " ", destin, "does not exist"
continue
delta = list(difflib.unified_diff(open(source).readlines(), open(destin).readlines()))
delta = list(difflib.unified_diff(\
open(source).read().splitlines(),\
open(destin).read().splitlines(),\
fromfile=source,\
tofile=destin,\
fromfiledate=datetime.datetime.fromtimestamp(os.path.getmtime(source)),\
tofiledate=datetime.datetime.fromtimestamp(os.path.getmtime(destin)),\
lineterm=""))
delta_len = len(delta)
if delta_len > 0:
if verbose:
print delta
print source, destin, "#%d" % delta_len
print_list[destin] = (source, list(delta))
for destin in sorted(print_list.keys()):
delta_list.append(destin)
source, delta = print_list[destin]
if args.command:
if args.swap:
print args.command, destin, source
else:
print args.command, source, destin
else:
print source, destin, "#%d" % len(delta)
if args.show:
for line in delta:
print line
print "Changed file count:", len(delta_list)
if args.gitreferall:
print "gitrefer",
for filename in sigma_list:
print filename,
print
if args.gitrefer:
print "gitrefer",
for filename in delta_list:
print filename,
print
if args.link:
print "Linking Files:"

View File

@ -0,0 +1,66 @@
#!/usr/bin/python
# vim: tabstop=8 softtabstop=4 shiftwidth=4 nocindent smartindent
import os, sys
import sqlite3
import subprocess
import shlex
import traceback
def setup_database():
"""Create a database to hold the data"""
global conn, curs
if os.path.exists("gitxref.sqlite"):
os.remove("gitxref.sqlite")
conn = sqlite3.connect("gitxref.sqlite")
curs = conn.cursor()
curs.execute("CREATE TABLE commit_tab (commit_hash TEXT, timestamp INTEGER, message TEXT)")
curs.execute("CREATE TABLE blob_tab (commit_hash TEXT, blob_hash TEXT, blob_name TEXT)")
def populate_data():
global conn, curs
cmd = 'git log --pretty=format:"%T %H %at %s" ' + args.branch
log_txt, err = subprocess.Popen(shlex.split(cmd),\
stdout=subprocess.PIPE,\
stderr=subprocess.PIPE).communicate()
log_txt = log_txt.splitlines()
err = err.splitlines()
for log_line in log_txt:
tree_hash, commit_hash, timestamp, message = log_line.split(" ", 3)
curs.execute("insert into commit_tab values (:1, :2, :3)",\
(commit_hash, timestamp, repr(message)))
cmd = "git ls-tree -r " + tree_hash
ls_txt, err = subprocess.Popen(shlex.split(cmd),\
stdout=subprocess.PIPE,\
stderr=subprocess.PIPE).communicate()
ls_txt = ls_txt.splitlines()
err = err.splitlines()
for line in ls_txt:
a, b, blob_hash, fname = line.split(None, 3)
curs.execute("insert into blob_tab values (:1, :2, :3)",\
(commit_hash, blob_hash, fname))
def create_indexes():
global conn, curs
curs.execute("CREATE INDEX commit_commit on commit_tab (commit_hash)")
curs.execute("CREATE INDEX commit_blob on blob_tab (commit_hash)")
curs.execute("CREATE INDEX blob_blob on blob_tab (blob_hash)")
def main_program():
setup_database()
populate_data()
create_indexes()
if __name__ == "__main__":
global args
import argparse
import cProfile
parser = argparse.ArgumentParser()
parser.add_argument("-b", "--branch", default="HEAD", help="load for branch [HEAD]")
parser.add_argument("-d", "--debug", action="store_true", help="debugging output")
parser.add_argument("-p", "--profile", action="store_true", help="profile output")
args = parser.parse_args()
if args.profile:
cProfile.run('main_program()')
else:
main_program()

View File

@ -0,0 +1,159 @@
#!/usr/bin/python
# vim: tabstop=8 softtabstop=4 shiftwidth=4 nocindent smartindent
import os, sys
import sqlite3
import datetime
import subprocess
import shlex
import argparse
import string
def get_hash_from_file(filename):
"""Generate the git hash of a file"""
global debug
cmd = "git hash-object " + filename
hashtxt, err = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
hashtxt = hashtxt.splitlines()
err = err.splitlines()
if debug:
for line in hashtxt:
print " Txt: %s" % line
for line in err:
print " Err: %s" % line
obj_hash = hashtxt[0].strip()
if Verbose:
print "Hash:", obj_hash, filename
return obj_hash
def do_one_way(the_hashes):
global conn
global curs
conn = sqlite3.connect("gitxref.sqlite")
curs = conn.cursor()
blob_map = {}
commit_map = {}
select = "select blob_hash, blob_name, c.commit_hash, timestamp, message "+\
"from blob_tab b, commit_tab c "+\
"where b.blob_hash == :1 and b.commit_hash == c.commit_hash "+\
"order by timestamp"
for one_hash in the_hashes:
curs.execute(select, (one_hash,))
old_hash = None
for blob_hash, blob_name, commit_hash, timestamp, message in curs.fetchall():
if blob_hash != old_hash:
if Verbose:
print "File:", blob_hash, blob_name
old_hash = blob_hash
if blob_hash not in blob_map:
blob_map[blob_hash] = (blob_name, set())
blob_map[blob_hash][1].add(commit_hash)
if commit_hash not in commit_map:
commit_map[commit_hash] = (timestamp, message, set())
commit_map[commit_hash][2].add(blob_hash)
if len(message) > 60:
message = message[:57] + "..."
timestamp = datetime.datetime.fromtimestamp(timestamp)
if Verbose:
print " Commit:", commit_hash, timestamp, message
commit_union = set(commit_map.keys())
junk = sorted(commit_map.keys(),\
key=lambda x: (len(commit_map[x][2]), -commit_map[x][0]),\
reverse=True)
for blob_key in blob_map.keys():
blob_name, commit_set = blob_map[blob_key]
commit_union.intersection_update(commit_set)
if len(commit_union) == 0:
print "Recalculating commit_map"
for commit_key in sorted(commit_map.keys(), key=lambda x: len(commit_map[x][2]), reverse=True):
timestamp, message, blobs = commit_map[commit_key]
if len(commit_union) == 0:
blob_union = set(blobs)
if len(blob_union.intersection(blobs)) < len(blob_union):
break;
commit_union.add(commit_key)
if 1 in Summary:
print "Summary1: Files with Commits"
for blob_key in blob_map.keys():
blob_name, commit_set = blob_map[blob_key]
print blob_key +":", blob_name, len(commit_set)
if 2 in Summary:
print "Summary2: Commits with most files"
for commit_key in sorted(commit_union, key=lambda x: commit_map[x][0]):
timestamp, message, blobs = commit_map[commit_key]
timestamp = datetime.datetime.fromtimestamp(timestamp)
if len(message) > 50:
message = message[:47] + "..."
print " Commit:", commit_key, timestamp, len(blobs), message
if 3 in Summary:
print "Summary3: Commits with files"
for commit_key in sorted(commit_map.keys(), key=lambda x: commit_map[x][0]):
timestamp, message, blobs = commit_map[commit_key]
timestamp = datetime.datetime.fromtimestamp(timestamp)
if len(message) > 60:
message = message[:57] + "..."
print " Commit:", commit_key, timestamp, len(blobs), message
if 4 in Summary:
print "Summary4: Files not in Summary 2"
for blob_key in blob_map.keys():
blob_name, commit_set = blob_map[blob_key]
if len(commit_union.intersection(commit_set)) == 0:
print "\n"+blob_key +":", blob_name, len(commit_set), commit_set
if 5 in Summary:
print "Summary5: Files without Commits"
for hash_key in [x for x in hash_map.keys() if x not in blob_map]:
print hash_key, hash_map[hash_key]
def do_args():
global hash_map, args
the_hashes = []
hash_map = {}
for filename in args.filenames:
if '.' in filename:
one_hash = get_hash_from_file(filename)
hash_map[one_hash] = filename
the_hashes.append(one_hash)
elif all(a in string.hexdigits for a in filename):
the_hashes.append(filename)
do_one_way(the_hashes)
def main_program():
global debug, Verbose, Summary
global hash_map, args
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", help="Add debugging output", action="store_true")
parser.add_argument("-p", "--profile", help="Add profile output", action="store_true")
parser.add_argument("-v", "--verbose", help="Add verbose output", action="store_true")
parser.add_argument("-s", "--summary", \
default = "2", \
help="select summary")
parser.add_argument("filenames", metavar="file", help="existing filename(s)", nargs="+")
args = parser.parse_args()
if args.debug:
debug = True
else:
debug = False
if args.verbose:
Verbose = True
else:
Verbose = False
if Verbose or debug:
print args
Summary = set()
for rng in [x.strip() for x in args.summary.split(',')]:
if '-' in rng:
lo, hi = rng.split('-')
if lo.isdigit() and hi.isdigit():
Summary.add(range(int(lo), int(hi)+1))
else:
if rng.isdigit():
Summary.add(int(rng))
print "Summary:", Summary
if args.profile:
import cProfile
cProfile.run('do_args()')
else:
do_args()
if __name__ == "__main__":
main_program();