import gzip from glob import glob import re import os import sys import shutil import time def zcat_compress(gzfile, files): with gzip.open(gzfile+'z', 'wo') as out: out.write('ZCAT') n = 0 for filename in files: with open(filename) as f: content = f.read() if re.match(r'#20\d\d-\d\d-\d\d ', content) is None: print('%s is not a sea log file' % filename) os.rename(filename, filename + ".bad") else: out.write("\n%d %s\n" % (len(content), filename)) out.write(content) n += 1 os.rename(gzfile+'z', gzfile) print("%d files compressed to %s" % (n, gzfile)) def zcat_expand(gzfile): with gzip.open(gzfile) as f: line = f.readline() assert(line.startswith('ZCAT')) line = f.readline() n = 0 m = 0 while line != '': size, filename = line[:-1].split(' ',1) content = f.read(int(size)) try: with open(filename) as g: c = g.read() if content != c: print('%s does not match' % filename) m += 1 except IOError: with open(filename, 'w') as g: g.write(content) n += 1 line = f.readline() line = f.readline() print("%s extracted from %s, (%d files already there)" % (n,gzfile,m)) def instdir(instrument=''): if instrument != '': instrument += '/' return '%s/sea/%s' % (os.path.expanduser("~"), instrument) def compress(logdir, gzdir, variable): os.chdir(logdir + variable) if not os.path.isdir(gzdir): os.makedirs(gzdir) gzfile = '%s%s.gz' % (gzdir, variable) if os.path.isfile(gzfile): # expand already compressed files (for not loosing them) expand(logdir, gzdir, variable) files = sorted(glob('*.log')) if files: zcat_compress(gzfile, files) def expand(logdir, gzdir, variable): if not os.path.isdir(logdir): os.makedirs(logdir) os.chdir(logdir + variable) gzfile = '%s%s.gz' % (gzdir, variable) zcat_expand(gzfile) COMPRESS = 1 EXPAND = 2 def treat(action, instrument='', years='past', vars=None): logbase = '%slogger/' % instdir(instrument) gzbase = '%sgzlogger/' % instdir(instrument) os.chdir(logbase) print('chdir', logbase, gzbase) if years == 'all': yrs = list(glob('20*')) elif years == 'past': yrs = list(glob('20*')) try: yrs.remove(time.strftime("%Y")) except ValueError: pass else: yrs = years for year in yrs: logdir = logbase + year + '/' gzdir = gzbase + year + '/' os.chdir(logdir) if not vars: varlist = list(glob('*')) else: varlist = vars for variable in varlist: if action == COMPRESS: if os.path.isdir(logdir+variable): if not os.path.isfile(gzdir + variable + '.gz'): t0 = time.time() compress(logdir, gzdir, variable) time.sleep(min(1.0, time.time()-t0)) # be nice elif action == EXPAND: os.makedirs(logdir+variable) print(gzdir, variable, '.gz') if os.path.isfile(gzdir + variable + '.gz'): expand(logdir, gzdir, variable) else: print(gzdir + variable + '.gz', 'is missing') if __name__ == '__main__': action = COMPRESS years = [] instruments = [] vars = [] cleanup = True addto = instruments for opt in sys.argv[1:]: if opt == '-x': action = EXPAND cleanup = False elif opt == '-c': cleanup = False elif opt == '-n': action = None elif opt == '-i': addto = instruments elif opt == '-p': years = 'past' elif opt == '-a': years = 'all' elif opt == '-v': addto = vars elif opt == '-y': addto = years else: addto.append(opt) if len(years) == 0: years = 'past' if years not in ('past', 'all'): for year in years: assert(2007 <= int(year) <= 2030) if len(instruments) == 0: instruments = [''] for instrument in instruments: if not os.path.isdir(instdir(instrument)): raise ValueError('%s is not an instrument on this machine' % instrument) removedirs = ( 'device.force_status_save', 'nv.autoflow.*buf', 'nv.autoflow.*min', 'nv.autoflow.*max', 'table.val_*', '*.*.stddev', 'device.ccu4version', 'cc.f', 'cc.nfb', 'cc.hfb', 'graph_*', ) print("instruments: %s" % ", ".join(instruments)) print("years: %s" % repr(years)) print("vars: %s" % ", ".join(vars)) for instrument in instruments: print('instrument:', instdir(instrument)) if cleanup: for pat in removedirs: print('remove', pat) gap = 0 t0 = time.time() t1 = t0 for dir in glob(instdir(instrument)+'logger/*/'+pat): shutil.rmtree(dir) now = time.time() if now > t0 + 5: if gap > 0: print("+%d" % gap) print(dir) else: gap += 1 time.sleep(min(1.0, now - t1)) # be nice t1 = now if action: treat(action, instrument, years, vars)