Files
sea/compress_logger/compress.py
2025-01-17 17:05:02 +01:00

189 lines
5.8 KiB
Python

import gzip
from glob import glob
import re
import os
import sys
import shutil
import time
def zcat_compress(gzfile, files):
with gzip.open(gzfile+'z', 'wo') as out:
out.write('ZCAT')
n = 0
for filename in files:
with open(filename) as f:
content = f.read()
if re.match(r'#20\d\d-\d\d-\d\d ', content) is None:
print('%s is not a sea log file' % filename)
os.rename(filename, filename + ".bad")
else:
out.write("\n%d %s\n" % (len(content), filename))
out.write(content)
n += 1
os.rename(gzfile+'z', gzfile)
print("%d files compressed to %s" % (n, gzfile))
def zcat_expand(gzfile):
with gzip.open(gzfile) as f:
line = f.readline()
assert(line.startswith('ZCAT'))
line = f.readline()
n = 0
m = 0
while line != '':
size, filename = line[:-1].split(' ',1)
content = f.read(int(size))
try:
with open(filename) as g:
c = g.read()
if content != c:
print('%s does not match' % filename)
m += 1
except IOError:
with open(filename, 'w') as g:
g.write(content)
n += 1
line = f.readline()
line = f.readline()
print("%s extracted from %s, (%d files already there)" % (n,gzfile,m))
def instdir(instrument=''):
if instrument != '':
instrument += '/'
return '%s/sea/%s' % (os.path.expanduser("~"), instrument)
def compress(logdir, gzdir, variable):
os.chdir(logdir + variable)
if not os.path.isdir(gzdir):
os.makedirs(gzdir)
gzfile = '%s%s.gz' % (gzdir, variable)
if os.path.isfile(gzfile):
# expand already compressed files (for not loosing them)
expand(logdir, gzdir, variable)
files = sorted(glob('*.log'))
if files:
zcat_compress(gzfile, files)
def expand(logdir, gzdir, variable):
if not os.path.isdir(logdir):
os.makedirs(logdir)
os.chdir(logdir + variable)
gzfile = '%s%s.gz' % (gzdir, variable)
zcat_expand(gzfile)
COMPRESS = 1
EXPAND = 2
def treat(action, instrument='', years='past', vars=None):
logbase = '%slogger/' % instdir(instrument)
gzbase = '%sgzlogger/' % instdir(instrument)
os.chdir(logbase)
print('chdir', logbase, gzbase)
if years == 'all':
yrs = list(glob('20*'))
elif years == 'past':
yrs = list(glob('20*'))
try:
yrs.remove(time.strftime("%Y"))
except ValueError:
pass
else:
yrs = years
for year in yrs:
logdir = logbase + year + '/'
gzdir = gzbase + year + '/'
os.chdir(logdir)
if not vars:
varlist = list(glob('*'))
else:
varlist = vars
for variable in varlist:
if action == COMPRESS:
if os.path.isdir(logdir+variable):
if not os.path.isfile(gzdir + variable + '.gz'):
t0 = time.time()
compress(logdir, gzdir, variable)
time.sleep(min(1.0, time.time()-t0)) # be nice
elif action == EXPAND:
os.makedirs(logdir+variable)
print(gzdir, variable, '.gz')
if os.path.isfile(gzdir + variable + '.gz'):
expand(logdir, gzdir, variable)
else:
print(gzdir + variable + '.gz', 'is missing')
if __name__ == '__main__':
action = COMPRESS
years = []
instruments = []
vars = []
cleanup = True
addto = instruments
for opt in sys.argv[1:]:
if opt == '-x':
action = EXPAND
cleanup = False
elif opt == '-c':
cleanup = False
elif opt == '-n':
action = None
elif opt == '-i':
addto = instruments
elif opt == '-p':
years = 'past'
elif opt == '-a':
years = 'all'
elif opt == '-v':
addto = vars
elif opt == '-y':
addto = years
else:
addto.append(opt)
if len(years) == 0:
years = 'past'
if years not in ('past', 'all'):
for year in years:
assert(2007 <= int(year) <= 2030)
if len(instruments) == 0:
instruments = ['']
for instrument in instruments:
if not os.path.isdir(instdir(instrument)):
raise ValueError('%s is not an instrument on this machine' % instrument)
removedirs = (
'device.force_status_save',
'nv.autoflow.*buf',
'nv.autoflow.*min',
'nv.autoflow.*max',
'table.val_*',
'*.*.stddev',
'device.ccu4version',
'cc.f',
'cc.nfb',
'cc.hfb',
'graph_*',
)
print("instruments: %s" % ", ".join(instruments))
print("years: %s" % repr(years))
print("vars: %s" % ", ".join(vars))
for instrument in instruments:
print('instrument:', instdir(instrument))
if cleanup:
for pat in removedirs:
print('remove', pat)
gap = 0
t0 = time.time()
t1 = t0
for dir in glob(instdir(instrument)+'logger/*/'+pat):
shutil.rmtree(dir)
now = time.time()
if now > t0 + 5:
if gap > 0:
print("+%d" % gap)
print(dir)
else:
gap += 1
time.sleep(min(1.0, now - t1)) # be nice
t1 = now
if action:
treat(action, instrument, years, vars)