From c7051bfe69a9bc54839631fdcae53a68a07b4105 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Fri, 24 May 2024 15:55:15 +0200
Subject: [PATCH] updated readme and reader to handle ignore ascii character
 errors

---
 README.md                | 20 +++++++++++++++++---
 src/g5505_file_reader.py |  6 +++++-
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 13a7b99..5a9a77b 100644
--- a/README.md
+++ b/README.md
@@ -20,11 +20,25 @@ Follow these steps to install and set up the project:
   ```
   conda env create -f environment.yml
   ```
-4. Activate the created environment by running the appropriate command based on your preferred Python IDE or terminal:
+4. Activate the created environment by running the following command:
+  ```
+  conda activate multiphase_chemistry_env
+  ```
+5. Once the enviroment is activated, register the associated kernel in jupyter by running:
 
-  * Jupyter Notebook/Lab: When starting a new notebook, select the `multiphase_chemistry_env` environment from the kernel options.
+  ```
+  python -m ipykernel install --user --name multiphase_chemistry_env --display-name "Python (multiphase_chemistry_env)"  
+  ```
 
-  * Visual Studio Code (VS Code): After opening your project in VS Code, click on the Python interpreter in the status bar and choose the `multiphase_chemistry_env` environment.
+
+6. Start a Jupyter Notebook by running the command:
+  ```
+  jupyter notebook
+  ```
+  and select the `multiphase_chemistry_env` environment from the kernel options.
+
+
+7. Otherwise, for Visual Studio Code (VS Code), after opening your project in VS Code, click on the Python interpreter in the status bar and choose the `multiphase_chemistry_env` environment.
 
 ## Data integration workflow
 
diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py
index d0f4b17..12bc623 100644
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@@ -175,8 +175,12 @@ def read_txt_files_as_dict(filename : str ):
 
     if table_preamble:
         max_length = max(len(item) for item in table_preamble)
+        # Convert the strings to bytes with utf-8 encoding, specifying errors='ignore' to skip characters that cannot be encoded
+        table_preamble_bytes = [item.encode('utf-8', errors='ignore') for item in table_preamble]
+    
+
         utf8_type = h5py.string_dtype('utf-8', max_length)
-        header_dict["table_preamble"] = np.array(table_preamble,dtype=utf8_type) 
+        header_dict["table_preamble"] = np.array(table_preamble_bytes,dtype=utf8_type) 
 
    
     # TODO: it does not work with separator as none :(. fix for RGA