#!/usr/bin/env python # vim: ts=8 sts=4 sw=4 et ai si # Author: Douglas Clowes 2013 (dcl@ansto.gov.au) # # Program to compare what *is* deployed with *was* deployed. # It uses the file FILEMAP.TXT which can be created after the deployment. # The location of FILEMAP.TXT may be different to the deployment path. # # For example, to compare what is on echidna (without a FILEMAP.TXT file) with # what existed at the time it was deployed: # # sshfs -o ro -o follow_symlinks echidna@ics2-echidna.nbi.ansto.gov.au:/usr/local/sics mnt # git checkout git checkout 5579f3e # git checkout merge-replace -- deploySICS.sh # ./deploySICS.sh -n echidna localhost # python compare_deploy.py --manifest=FILEMAP.TXT mnt/newserver/ # ... # fusermount -u mnt # #Notes: # "sshfs -o ro" mount the echidna tree read only # the 5579f3e is from the DEPLOYMENT.TXT file on echidna (mnt/server/DEPLOYMENT.TXT) # The first checkout is to get what *was* deployed to echidna in the working directory # The second checkout is just to get a deploySICS.sh that produces FILEMAP.TXT in staging # The "./deploySICS.sh -n" does NOT deploy anywhere but creates FILEMAP.TXT in staging # The compare_deploy.py uses FILEMAP.TXT from the staging tree produced by the "./deploySICS.sh -n" # The compare_deploy.py path is to what *is* deployed on echidna # The untracked files may be manually deployed and missing from MANIFEST.TXT # The untracked files may be new, modified, test or junk # You can then "gvim -d " then cut/paste lines emitted under "Changed Files:" (or diff) # import os import re import sys import argparse import difflib import shutil import datetime def load_manifest(theManifest): ''' This function loads the manifest file ("FILEMAP.TXT") which consists of a series of shell variables followed by the output of various 'cp -v' commands and produces two dictionaries, one for the assignments (shell variables) and one for the filemap (file associations). The shell variables give context to the file mapping. ''' # get the lines from the manifest file with the newline removed contents = [j[:-1] for j in open(theManifest).readlines()] # divide the lines into assignments and filemaps assignments = [] filemaps = [] for line in contents: if "->" in line: filemaps.append(line) else: assignments.append(line) # parse the assignments and create the assignment map which maps shell variables (left) # from the deploy script to their value at the time of deployment assignment_map = {} for line in assignments: left, right = line.split("=") assignment_map[left] = right # parse the file associations and create the file map which maps the files in the # destination tree (right) to the file in the source tree from which they came (left) filemap_map = {} for line in filemaps: if line[0] in "`'": val = re.sub(r"^[`'](.*)[`'] -> [`'](.*)[`'].*", r"\1|\2", line) else: val = re.sub(r"^(.*) -> (.*)", r"\1|\2", line) if val == line: # unexpected, probably an error print "Val:", val else: left, right = val.split("|") if (left + "|" + right) != val: # unexpected, probably an error print "LR:", left, right else: # use the assignment map to remove the staging (temporary) directory # from the front of the filename and make it independent of that # and reduce leading slashes to one only if right.startswith(assignment_map["TEMPDIR"]): right = right[len(assignment_map["TEMPDIR"]):] while right.startswith("//"): right = right[1:] # use the assignment map to remove the destination directory # from the front of the filename and make it independent of that # and remove leading slashes if right.startswith(assignment_map["DESTDIR"]): right = right[len(assignment_map["DESTDIR"]):] while right.startswith("/"): right = right[1:] prefix = "newserver/" if right.startswith(prefix): right = right[len(prefix):] else: # unexpected, probably an error print "LR+:", left, right # Note: map[destination] = source filemap_map[right] = left return (assignment_map, filemap_map) def load_dir(theDir): ''' Walk the directory tree and populate a double map with the filename and directory in which it was found ''' myDirs = {} theLen = len(theDir) + 1 for root, dirs, files in os.walk(theDir): for file in files: # Don't even put these files into the map if file.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")): continue if file.startswith(("core.", "SICServer")): continue if not file in myDirs: myDirs[file] = {} myDirs[file][root[theLen:]] = {} return myDirs if __name__ == "__main__": default_dir = "/usr/local/TEST_SICS/taipan/nbi/sics/taipan" verbose = False parser = argparse.ArgumentParser(description = "Compare a deployed instrument tree with a repository") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="emit tables for debugging") parser.add_argument("--link", dest="link", action="store_true", help="link deployed files to /tmp tree") parser.add_argument("--copy", dest="copy", action="store_true", help="copy deployed files to /tmp tree") parser.add_argument("--show", dest="show", action="store_true", help="show the differences") parser.add_argument("--swap", dest="swap", action="store_true", help="swap the command arguments") parser.add_argument("-m", "--manifest", dest="manifest", help="specify another manifest directory") parser.add_argument("--command", dest="command", help="specify a command for diffing") parser.add_argument("--gitrefer", action="store_true", help="output a gitrefer commandstring") parser.add_argument("--gitreferall", action="store_true", help="output a gitrefer commandstring for all files") parser.add_argument("path", nargs="?", default = default_dir, help="directory containing FILEMAP.TXT") args = parser.parse_args() if args.verbose: verbose = True default_dir = os.path.abspath(args.path) if default_dir.endswith("FILEMAP.TXT"): default_dir = os.path.dirname(default_dir) if args.manifest: temp_dir = args.manifest if temp_dir.endswith("FILEMAP.TXT"): temp_dir = os.path.dirname(temp_dir) default_manifest = os.path.join(temp_dir, "FILEMAP.TXT") else: default_manifest = os.path.join(default_dir, "FILEMAP.TXT") root_dir = os.path.dirname(default_dir) print "path:", args.path print "dest:", default_dir print "root:", root_dir print "--manifest:", args.manifest print "Manifest:", default_manifest amap, fmap = load_manifest(default_manifest) if verbose: print "Assignments:" for key in sorted(amap): print " ", key, "=", amap[key] print "Contents:" for key in sorted(fmap): print " ", key, "=", fmap[key] gmap = load_dir(default_dir) if verbose: print "Target:" for key in sorted(gmap): print " ", key for entry in sorted(gmap[key]): print " ", entry print "Untracked Files:" for key in sorted(gmap): if key.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")): continue if key.startswith(("core.", "SICServer")): continue for entry in sorted(gmap[key]): if entry.endswith(("/data", "/log")): continue if "/data/" in entry: continue if os.path.join(entry, key) not in fmap: #print "Looking in fmap for", os.path.join(entry, key) print os.path.join(default_dir, os.path.join(entry, key)) print "Changed Files:" delta_list = [] sigma_list = [] print_list = {} for key in sorted(gmap): if key.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")): continue if key.startswith(("core.", "SICServer")): continue for entry in sorted(gmap[key]): target = os.path.join(entry, key) #print "Looking in fmap for", os.path.join(entry, key) if target in fmap: source = os.path.join(amap["SRCDIR"], fmap[target]) source = os.path.join(os.getenv("PWD"), fmap[target]) destin = os.path.join(default_dir, target) if os.path.exists(destin): sigma_list.append(destin) if not (os.path.exists(source) and os.path.exists(destin)): print " Compare:", source, destin if not os.path.exists(source): print " ", source, "does not exist" if not os.path.exists(destin): print " ", destin, "does not exist" continue delta = list(difflib.unified_diff(\ open(source).read().splitlines(),\ open(destin).read().splitlines(),\ fromfile=source,\ tofile=destin,\ fromfiledate=datetime.datetime.fromtimestamp(os.path.getmtime(source)),\ tofiledate=datetime.datetime.fromtimestamp(os.path.getmtime(destin)),\ lineterm="")) delta_len = len(delta) if delta_len > 0: print_list[destin] = (source, list(delta)) for destin in sorted(print_list.keys()): delta_list.append(destin) source, delta = print_list[destin] if args.command: if args.swap: print args.command, destin, source else: print args.command, source, destin else: print source, destin, "#%d" % len(delta) if args.show: for line in delta: print line print "Changed file count: %d/%d" % (len(delta_list), len(sigma_list)) if args.gitreferall: print "gitrefer", for filename in sigma_list: print filename, print if args.gitrefer: print "gitrefer", for filename in delta_list: print filename, print if args.link: print "Linking Files:" count = 0 lines = [] for key in sorted(gmap): if key.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")): continue if key.startswith(("core.", "SICServer")): continue for entry in sorted(gmap[key]): target = os.path.join(entry, key) #print "Looking in fmap for", os.path.join(entry, key) if target in fmap: source = os.path.join(amap["SRCDIR"], fmap[target]) source = os.path.join(os.getenv("PWD"), fmap[target]) destin = os.path.join(default_dir, target) if not (os.path.exists(source) and os.path.exists(destin)): print " Linking:", source, destin if not os.path.exists(source): print " ", source, "does not exist" if not os.path.exists(destin): print " ", destin, "does not exist" continue lines.append((destin, fmap[target])) if len(lines) > 0: tgt = "/tmp/link_%s" % str(os.getpid()) if (os.path.exists(tgt)): for root, disr, files in os.walk(tgt, topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) os.mkdir(tgt) for line in lines: source = line[0] destin = os.path.abspath(os.path.join(tgt, line[1])) if not destin.startswith(tgt): print "Cannot link: %s to %s" % (source, destin) continue try: os.makedirs(os.path.dirname(destin)) except: pass if verbose: cmd = "ln -s %s %s" % (source, destin) print cmd if os.path.exists(destin): delta = list(difflib.unified_diff(open(source).readlines(), open(destin).readlines())) if len(delta) == 0: continue print "Duplicate Link:", destin continue try: os.symlink(source, destin) count += 1 except: pass print "Links (%d) in: %s" % (count, tgt) if args.copy: print "Copying Files:" count = 0 lines = [] for key in sorted(gmap): if key.endswith((".swp", ".swo", ".hdf", "~", ".bck", ".pyc")): continue if key.startswith(("core.", "SICServer")): continue for entry in sorted(gmap[key]): target = os.path.join(entry, key) #print "Looking in fmap for", os.path.join(entry, key) if target in fmap: source = os.path.join(amap["SRCDIR"], fmap[target]) source = os.path.join(os.getenv("PWD"), fmap[target]) destin = os.path.join(default_dir, target) if not (os.path.exists(source) and os.path.exists(destin)): print " Copying:", source, destin if not os.path.exists(source): print " ", source, "does not exist" if not os.path.exists(destin): print " ", destin, "does not exist" continue lines.append((destin, fmap[target])) if len(lines) > 0: tgt = "/tmp/copy_%s" % str(os.getpid()) if (os.path.exists(tgt)): for root, disr, files in os.walk(tgt, topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) os.mkdir(tgt) for line in lines: source = os.path.abspath(line[0]) destin = os.path.abspath(os.path.join(tgt, line[1])) if not destin.startswith(tgt): print "Cannot copy: %s to %s" % (source, destin) continue try: os.makedirs(os.path.dirname(destin)) except: pass if verbose: cmd = "cp -p %s %s" % (source, destin) print cmd if os.path.exists(destin): delta = list(difflib.unified_diff(open(source).readlines(), open(destin).readlines())) if len(delta) == 0: continue print "Duplicate Copy:", destin continue try: shutil.copyfile(source, destin) shutil.copystat(source, destin) count += 1 except: pass print "Copies (%d) in: %s" % (count, tgt)