add new compress script lz4.py

This commit is contained in:
2026-02-09 14:33:40 +01:00
parent 30fcb9fda9
commit 3648b83db8
2 changed files with 117 additions and 8 deletions

View File

@@ -26,21 +26,21 @@ def zcat_compress(gzfile, files):
def zcat_expand(gzfile):
with gzip.open(gzfile) as f:
line = f.readline()
assert(line.startswith('ZCAT'))
assert(line.startswith(b'ZCAT'))
line = f.readline()
n = 0
m = 0
while line != '':
size, filename = line[:-1].split(' ',1)
while line != b'':
size, filename = line[:-1].split(b' ',1)
content = f.read(int(size))
try:
with open(filename) as g:
with open(filename, 'rb') as g:
c = g.read()
if content != c:
print('%s does not match' % filename)
m += 1
except IOError:
with open(filename, 'w') as g:
with open(filename, 'wb') as g:
g.write(content)
n += 1
line = f.readline()
@@ -51,7 +51,7 @@ def instdir(instrument=''):
if instrument != '':
instrument += '/'
return '%s/sea/%s' % (os.path.expanduser("~"), instrument)
def compress(logdir, gzdir, variable):
os.chdir(logdir + variable)
if not os.path.isdir(gzdir):
@@ -92,7 +92,10 @@ def treat(action, instrument='', years='past', vars=None):
for year in yrs:
logdir = logbase + year + '/'
gzdir = gzbase + year + '/'
os.chdir(logdir)
if action == COMPRESS:
os.chdir(logdir)
else:
os.chdir(gzdir)
if not vars:
varlist = list(glob('*'))
else:
@@ -105,7 +108,9 @@ def treat(action, instrument='', years='past', vars=None):
compress(logdir, gzdir, variable)
time.sleep(min(1.0, time.time()-t0)) # be nice
elif action == EXPAND:
os.makedirs(logdir+variable)
if variable.endswith('.gz'):
variable = variable[:-3]
os.makedirs(logdir+variable, exist_ok=True)
print(gzdir, variable, '.gz')
if os.path.isfile(gzdir + variable + '.gz'):
expand(logdir, gzdir, variable)

104
compress_logger/lz4.py Normal file
View File

@@ -0,0 +1,104 @@
from glob import glob
import re
import os
import sys
import shutil
import time
def instdir(instrument=''):
if instrument != '':
instrument += '/'
return '%s/sea/%s' % (os.path.expanduser("~"), instrument)
COMPRESS = 1
EXPAND = 2
def treat(instrument, yearlimit):
logbase = '%slogger/' % instdir(instrument)
os.chdir(logbase)
print('chdir', logbase)
os.makedirs('trash', exist_ok=True)
for ynum in range(2007, 2040):
year = str(ynum)
cyear = f'{year}.tar.lz4'
if year < yearlimit:
if os.path.exists(year):
if os.path.exists(cyear):
print(cyear, 'exists already')
else:
os.system(f'tar cf - {year} | lz4 - {cyear}')
print(cyear, 'created')
if os.path.exists(f'trash/{year}'):
shutil.rmtree(f'trash/{year}')
os.rename(year, f'trash/{year}')
else:
print('no', year)
else:
if os.path.exists(cyear):
if os.path.exists(year):
print(year, 'exists already')
else:
os.system(f'lz4 -d {cyear} | tar xf -')
print(year, 'created')
os.rename(cyear, f'trash/{cyear}')
if __name__ == '__main__':
action = COMPRESS
year = time.strftime("%Y")
instruments = []
cleanup = True
for opt in sys.argv[1:]:
if opt == '-x':
action = EXPAND
cleanup = False
elif opt == '-c':
cleanup = False
else:
if opt.startswith('2'):
year = opt
else:
instruments.append(opt)
if len(instruments) == 0:
instruments = ['']
for instrument in instruments:
if not os.path.isdir(instdir(instrument)):
raise ValueError('%s is not an instrument on this machine' % instrument)
removedirs = (
'device.force_status_save',
'nv.autoflow.*buf',
'nv.autoflow.*min',
'nv.autoflow.*max',
'table.val_*',
'*.*.stddev',
'device.ccu4version',
'cc.f',
'cc.nfb',
'cc.hfb',
'graph_*',
)
print("instruments: %s" % ", ".join(instruments))
print("compress years before %s" % year)
for instrument in instruments:
print('instrument:', instdir(instrument))
if cleanup:
for pat in removedirs:
print('remove', pat)
gap = 0
t0 = time.time()
t1 = t0
for dir in glob(instdir(instrument)+'logger/*/'+pat):
shutil.rmtree(dir)
now = time.time()
if now > t0 + 5:
if gap > 0:
print("+%d" % gap)
print(dir)
else:
gap += 1
time.sleep(min(1.0, now - t1)) # be nice
t1 = now
treat(instrument, year)