added the feature to activate or deactivate data copying before reading the input file. This is to avoid redundant copying when we are already working on file copies.

2024-05-28 14:40:14 +02:00
parent e0d84d7822
commit 0f505df45c
1 changed files with 10 additions and 4 deletions
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -62,10 +62,13 @@ def read_xps_ibw_file_as_dict(filename):

    return file_dict

-def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name):
+def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True):
    # Create copy of original file to avoid possible file corruption and work with it.

-    tmp_file_path = utils.make_file_copy(source_file_path)
+    if work_with_copy:
+        tmp_file_path = utils.make_file_copy(source_file_path)
+    else:
+        tmp_file_path = source_file_path

    # Open backup h5 file and copy complet filesystem directory onto a group in h5file
    with h5py.File(tmp_file_path,'r') as src_file:
@ -109,7 +112,7 @@ def dataframe_to_np_structured_array(df: pd.DataFrame):

    return structured_array

-def read_txt_files_as_dict(filename : str ):
+def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):

    with open('src/text_data_sources.yaml','r') as stream:
        try:
@ -148,7 +151,10 @@ def read_txt_files_as_dict(filename : str ):
    header_dict = {}
    data_start = False    
    # Work with copy of the file for safety
-    tmp_filename = utils.make_file_copy(source_file_path=filename)
+    if work_with_copy:
+        tmp_filename = utils.make_file_copy(source_file_path=filename)
+    else:
+        tmp_filename = filename

    #with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f:
    with open(tmp_filename,'rb') as f: