From 1bf1f60bebdd98653af35d570cdb17eb0497e756 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Fri, 22 Mar 2024 17:28:47 +0100
Subject: [PATCH] Added lines to treat string attributes as fixed-length
 strings, which are represented as bytes that need to be decoded with utf-8.
 There are a few advantages, and hdf5 reader provide more precise behavior
 than variable length strings

---
 src/hdf5_lib.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py
index cbab913..ad41383 100644
--- a/src/hdf5_lib.py
+++ b/src/hdf5_lib.py
@@ -317,11 +317,21 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
                         if not file_dict:
                             continue
 
-                                    # file_dict = file_obj
                         # Create group and add their attributes
                         h5file[group_name].create_group(name=file_dict['name'])
                         for key in file_dict['attributes_dict'].keys():
-                            h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
+                            
+                            # Represent string values as fixed length strings in the HDF5 file, which need
+                            # to be decoded as string when we read them. It provides better control than variable strings,
+                            # at the expense of flexibility.
+                            # https://docs.h5py.org/en/stable/strings.html
+                            value = file_dict['attributes_dict'][key]
+                            if isinstance(value,str):
+                                utf8_type = h5py.string_dtype('utf-8', len(value))
+                                value = np.array(value.encode('utf-8'),dtype=utf8_type)
+
+                            h5file[group_name][file_dict['name']].attrs.create(name=key,
+                                                                               data=value)
                             
                         # Add datasets to just created group
                         for dataset in file_dict['datasets']: