diff --git a/reduction_tools/custom_spliting.py b/reduction_tools/custom_spliting.py new file mode 100755 index 0000000..263e0b0 --- /dev/null +++ b/reduction_tools/custom_spliting.py @@ -0,0 +1,54 @@ +import argparse +import re + +def extract_data_from_chunks( input_file, output_file, name ): + + print( "reading input file" ) + with open(input_file, 'r') as file: + data = file.read() + print( "done" ) + + print( "finding chunks" ) + chunks = re.findall(r'----- Begin chunk -----.*?----- End chunk -----', data, re.DOTALL) + print( "done. {0} found".format( len( chunks ) ) ) + + print( "cycle through chunks" ) + with open(output_file, 'w') as out_file: + for chunk in chunks: + indexed_by_line = re.search(r'indexed_by = (.+)', chunk) + if indexed_by_line: + indexed_by_value = indexed_by_line.group(1).strip() + if indexed_by_value != 'none': + image_filename_match = re.search(r'Image filename: (.+\.h5)', chunk) + event_match = re.search(r'Event: (//\d+)', chunk) +# condition_match = re.search(r'run\d+-(?:aslov2rac1|aslov2-Rac1)_([\w\d]+_SOS)', chunk) + + if image_filename_match and event_match: + image_filename = image_filename_match.group(1) + event_number = event_match.group(1) + #condition = condition_match.group(1) # Extracts "100ns_0p8uJ_SOS" or "dark_SOS" + + out_file.write(f"{image_filename} {event_number} {name}\n") + print( "done" ) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Extract data from chunks in a text file.") + parser.add_argument( "-i", + "--input", + help="Input file path", + required=True + ) + parser.add_argument( "-o", + "--output", + help="Output file path", + required=True + ) + parser.add_argument( "-n", + "--name", + help="name of dataset", + required=True + ) + args = parser.parse_args() + + extract_data_from_chunks( args.input, args.output, args.name ) +