53 lines
1.9 KiB
Python
Executable File
53 lines
1.9 KiB
Python
Executable File
import argparse
|
|
import re
|
|
|
|
def extract_data_from_chunks( input_file, output_file, name ):
|
|
|
|
print( "reading input file" )
|
|
with open(input_file, 'r') as file:
|
|
data = file.read()
|
|
print( "done" )
|
|
|
|
print( "finding chunks" )
|
|
chunks = re.findall(r'----- Begin chunk -----.*?----- End chunk -----', data, re.DOTALL)
|
|
print( "done. {0} found".format( len( chunks ) ) )
|
|
|
|
print( "cycle through chunks" )
|
|
with open(output_file, 'w') as out_file:
|
|
for chunk in chunks:
|
|
indexed_by_line = re.search(r'indexed_by = (.+)', chunk)
|
|
if indexed_by_line:
|
|
indexed_by_value = indexed_by_line.group(1).strip()
|
|
if indexed_by_value != 'none':
|
|
image_filename_match = re.search(r'Image filename: (.+\.h5)', chunk)
|
|
event_match = re.search(r'Event: (//\d+)', chunk)
|
|
|
|
if image_filename_match and event_match:
|
|
image_filename = image_filename_match.group(1)
|
|
event_number = event_match.group(1)
|
|
|
|
out_file.write(f"{image_filename} {event_number} {name}\n")
|
|
print( "done" )
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Extract data from chunks in a text file.")
|
|
parser.add_argument( "-i",
|
|
"--input",
|
|
help="Input file path",
|
|
required=True
|
|
)
|
|
parser.add_argument( "-o",
|
|
"--output",
|
|
help="Output file path",
|
|
required=True
|
|
)
|
|
parser.add_argument( "-n",
|
|
"--name",
|
|
help="name of dataset",
|
|
required=True
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
extract_data_from_chunks( args.input, args.output, args.name )
|
|
|