From c7051bfe69a9bc54839631fdcae53a68a07b4105 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Fri, 24 May 2024 15:55:15 +0200 Subject: [PATCH] updated readme and reader to handle ignore ascii character errors --- README.md | 20 +++++++++++++++++--- src/g5505_file_reader.py | 6 +++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 13a7b99..5a9a77b 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,25 @@ Follow these steps to install and set up the project: ``` conda env create -f environment.yml ``` -4. Activate the created environment by running the appropriate command based on your preferred Python IDE or terminal: +4. Activate the created environment by running the following command: + ``` + conda activate multiphase_chemistry_env + ``` +5. Once the enviroment is activated, register the associated kernel in jupyter by running: - * Jupyter Notebook/Lab: When starting a new notebook, select the `multiphase_chemistry_env` environment from the kernel options. + ``` + python -m ipykernel install --user --name multiphase_chemistry_env --display-name "Python (multiphase_chemistry_env)" + ``` - * Visual Studio Code (VS Code): After opening your project in VS Code, click on the Python interpreter in the status bar and choose the `multiphase_chemistry_env` environment. + +6. Start a Jupyter Notebook by running the command: + ``` + jupyter notebook + ``` + and select the `multiphase_chemistry_env` environment from the kernel options. + + +7. Otherwise, for Visual Studio Code (VS Code), after opening your project in VS Code, click on the Python interpreter in the status bar and choose the `multiphase_chemistry_env` environment. ## Data integration workflow diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index d0f4b17..12bc623 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -175,8 +175,12 @@ def read_txt_files_as_dict(filename : str ): if table_preamble: max_length = max(len(item) for item in table_preamble) + # Convert the strings to bytes with utf-8 encoding, specifying errors='ignore' to skip characters that cannot be encoded + table_preamble_bytes = [item.encode('utf-8', errors='ignore') for item in table_preamble] + + utf8_type = h5py.string_dtype('utf-8', max_length) - header_dict["table_preamble"] = np.array(table_preamble,dtype=utf8_type) + header_dict["table_preamble"] = np.array(table_preamble_bytes,dtype=utf8_type) # TODO: it does not work with separator as none :(. fix for RGA