From 1acbd2f758df2cd1de616a99a59c1a968b05d8aa Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Fri, 2 Aug 2024 14:37:06 +0200
Subject: [PATCH] Modified reader to output table_preamble as a dataset as
 opposed to attributes of the file. I believe this is better for readability
 of the metadata given that those preambles can sometimes contain large
 ammounts of text.

---
 src/g5505_file_reader.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py
index 7e43c27..d5065c6 100644
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@@ -186,16 +186,7 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
             #line = ' '.join(list_of_substrings)     
             table_preamble.append(' '.join([item for item in list_of_substrings]))# += new_line  
 
-    # Represent string values as fixed length strings in the HDF5 file, which need
-    # to be decoded as string when we read them. It provides better control than variable strings,
-    # at the expense of flexibility.
-    # https://docs.h5py.org/en/stable/strings.html
-
-    if table_preamble:
-        header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble)   
-       
-
-   
+  
     # TODO: it does not work with separator as none :(. fix for RGA
     try:
         df = pd.read_csv(tmp_filename, 
@@ -290,6 +281,22 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
         #except ValueError as err:
         #    print(err)
 
+    # Represent string values as fixed length strings in the HDF5 file, which need
+    # to be decoded as string when we read them. It provides better control than variable strings,
+    # at the expense of flexibility.
+    # https://docs.h5py.org/en/stable/strings.html
+
+
+        if table_preamble:        
+            #header_dict["table_preamble"] = utils.convert_string_to_bytes(table_preamble)  
+            tp_dataset = {}
+            tp_dataset['name'] = "table_preamble"
+            tp_dataset['data'] = utils.convert_string_to_bytes(table_preamble) 
+            tp_dataset['shape'] = tp_dataset['data'].shape
+            tp_dataset['dtype'] = type(tp_dataset['data'])
+            tp_dataset['attributes'] = {}
+            file_dict['datasets'].append(tp_dataset)
+
         file_dict['datasets'].append(dataset)
 
        
@@ -302,10 +309,6 @@ def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
         #    if timestamps_name in categorical_variables:
         #        dataset['attributes'] = {timestamps_name: utils.parse_attribute({'unit':'YYYY-MM-DD HH:MM:SS.ffffff'})}
         #    file_dict['datasets'].append(dataset) 
- 
-
-
-
     except:
         return {}