added the feature to activate or deactivate data copying before reading the input file. This is to avoid redundant copying when we are already working on file copies.

This commit is contained in:
2024-05-28 14:40:14 +02:00
parent e0d84d7822
commit 0f505df45c

View File

@ -62,10 +62,13 @@ def read_xps_ibw_file_as_dict(filename):
return file_dict
def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name):
def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True):
# Create copy of original file to avoid possible file corruption and work with it.
tmp_file_path = utils.make_file_copy(source_file_path)
if work_with_copy:
tmp_file_path = utils.make_file_copy(source_file_path)
else:
tmp_file_path = source_file_path
# Open backup h5 file and copy complet filesystem directory onto a group in h5file
with h5py.File(tmp_file_path,'r') as src_file:
@ -109,7 +112,7 @@ def dataframe_to_np_structured_array(df: pd.DataFrame):
return structured_array
def read_txt_files_as_dict(filename : str ):
def read_txt_files_as_dict(filename : str , work_with_copy : bool = True ):
with open('src/text_data_sources.yaml','r') as stream:
try:
@ -148,7 +151,10 @@ def read_txt_files_as_dict(filename : str ):
header_dict = {}
data_start = False
# Work with copy of the file for safety
tmp_filename = utils.make_file_copy(source_file_path=filename)
if work_with_copy:
tmp_filename = utils.make_file_copy(source_file_path=filename)
else:
tmp_filename = filename
#with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f:
with open(tmp_filename,'rb') as f: