From 1acbd2f758df2cd1de616a99a59c1a968b05d8aa Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Fri, 2 Aug 2024 14:37:06 +0200 Subject: [PATCH] Modified reader to output table_preamble as a dataset as opposed to attributes of the file. I believe this is better for readability of the metadata given that those preambles can sometimes contain large ammounts of text. --- src/g5505_file_reader.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index 7e43c27..d5065c6 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -186,16 +186,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ): #line = ' '.join(list_of_substrings) table_preamble.append(' '.join([item for item in list_of_substrings]))# += new_line - # Represent string values as fixed length strings in the HDF5 file, which need - # to be decoded as string when we read them. It provides better control than variable strings, - # at the expense of flexibility. - # https://docs.h5py.org/en/stable/strings.html - - if table_preamble: - header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble) - - - + # TODO: it does not work with separator as none :(. fix for RGA try: df = pd.read_csv(tmp_filename, @@ -290,6 +281,22 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ): #except ValueError as err: # print(err) + # Represent string values as fixed length strings in the HDF5 file, which need + # to be decoded as string when we read them. It provides better control than variable strings, + # at the expense of flexibility. + # https://docs.h5py.org/en/stable/strings.html + + + if table_preamble: + #header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble) + tp_dataset = {} + tp_dataset['name'] = "table_preamble" + tp_dataset['data'] = utils.convert_string_to_bytes(table_preamble) + tp_dataset['shape'] = tp_dataset['data'].shape + tp_dataset['dtype'] = type(tp_dataset['data']) + tp_dataset['attributes'] = {} + file_dict['datasets'].append(tp_dataset) + file_dict['datasets'].append(dataset) @@ -302,10 +309,6 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ): # if timestamps_name in categorical_variables: # dataset['attributes'] = {timestamps_name: utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})} # file_dict['datasets'].append(dataset) - - - - except: return {}