Refactored to interact with config_file.py, which sets available file readers
This commit is contained in:
@ -10,9 +10,11 @@ import plotly.express as px
|
|||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
from plotly.subplots import make_subplots
|
from plotly.subplots import make_subplots
|
||||||
|
|
||||||
import g5505_file_reader
|
#import g5505_file_reader
|
||||||
import g5505_utils as utils
|
import g5505_utils as utils
|
||||||
import smog_chamber_file_reader
|
#import smog_chamber_file_reader
|
||||||
|
|
||||||
|
import config_file
|
||||||
|
|
||||||
|
|
||||||
def read_mtable_as_dataframe(filename):
|
def read_mtable_as_dataframe(filename):
|
||||||
@ -278,14 +280,23 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
|
|
||||||
# Filter out files with filenames not containing a keyword specified in the parameter 'select_file_keywords'.
|
# Filter out files with filenames not containing a keyword specified in the parameter 'select_file_keywords'.
|
||||||
# When select_file_keywords is an empty, i.e., [], do not apply any filter on the filenames.
|
# When select_file_keywords is an empty, i.e., [], do not apply any filter on the filenames.
|
||||||
if select_file_keywords:
|
|
||||||
|
|
||||||
filtered_filename_list = []
|
filtered_filename_list = []
|
||||||
|
if select_file_keywords:
|
||||||
for filename in filenames_list:
|
for filename in filenames_list:
|
||||||
if any([date in filename for date in select_file_keywords]):
|
if any([keyword in filename for keyword in select_file_keywords]):
|
||||||
filtered_filename_list.append(filename)
|
filtered_filename_list.append(filename)
|
||||||
else:
|
else:
|
||||||
filtered_filename_list = filenames_list.copy()
|
filtered_filename_list = filenames_list.copy()
|
||||||
|
|
||||||
|
admissible_file_ext_list = list(config_file.ext_to_reader_dict.keys())
|
||||||
|
|
||||||
|
for filename in filtered_filename_list.copy():
|
||||||
|
if not any([ext in filename for ext in admissible_file_ext_list]):
|
||||||
|
filtered_filename_list.remove(filename)
|
||||||
|
|
||||||
|
|
||||||
# Skip subdirectories that do not contain a keyword in the parameter 'select_dir_keywords' when it is nonempty
|
# Skip subdirectories that do not contain a keyword in the parameter 'select_dir_keywords' when it is nonempty
|
||||||
if select_dir_keywords:
|
if select_dir_keywords:
|
||||||
if (dirpath.count(os.sep) > offset) and not any([item in dirpath for item in select_dir_keywords]):
|
if (dirpath.count(os.sep) > offset) and not any([item in dirpath for item in select_dir_keywords]):
|
||||||
@ -297,13 +308,15 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
# Set root_dir to top directory path in input file system
|
# Set root_dir to top directory path in input file system
|
||||||
root_dir = group_name
|
root_dir = group_name
|
||||||
group_name = group_name.replace(root_dir,'/')
|
group_name = group_name.replace(root_dir,'/')
|
||||||
#h5file.attrs.create(name='count',data=len(filenames_list))
|
|
||||||
h5file.attrs.create(name='file_list',data=filtered_filename_list)
|
h5file.attrs.create(name='filtered_file_list',data=filtered_filename_list)
|
||||||
|
h5file.attrs.create(name='file_list',data=filenames_list)
|
||||||
else:
|
else:
|
||||||
group_name = group_name.replace(root_dir+'/','/')
|
group_name = group_name.replace(root_dir+'/','/')
|
||||||
# Group hierarchy is implicitly defined by the forward slashes
|
# Group hierarchy is implicitly defined by the forward slashes
|
||||||
h5file.create_group(group_name)
|
h5file.create_group(group_name)
|
||||||
h5file[group_name].attrs.create(name='file_list',data=filtered_filename_list)
|
h5file[group_name].attrs.create(name='filtered_file_list',data=filtered_filename_list)
|
||||||
|
h5file[group_name].attrs.create(name='file_list',data=filenames_list)
|
||||||
|
|
||||||
|
|
||||||
# TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)
|
# TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)
|
||||||
@ -315,8 +328,24 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
|
|
||||||
for filename in filtered_filename_list:
|
for filename in filtered_filename_list:
|
||||||
|
|
||||||
|
# Get file extension (or file type)
|
||||||
|
file_name, file_ext = os.path.splitext(filename)
|
||||||
|
|
||||||
|
#try:
|
||||||
|
if not 'h5' in filename:
|
||||||
|
file_obj = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||||
|
else:
|
||||||
|
config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename),
|
||||||
|
dest_file_obj = h5file,
|
||||||
|
dest_group_name = group_name +'/'+filename)
|
||||||
|
print(file_ext, ':)')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if 'ibw' in filename:
|
if 'ibw' in filename:
|
||||||
file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(os.path.join(dirpath,filename))
|
#file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(os.path.join(dirpath,filename))
|
||||||
|
file_dict = file_obj
|
||||||
|
|
||||||
h5file[group_name].create_dataset(name = file_dict['name'],
|
h5file[group_name].create_dataset(name = file_dict['name'],
|
||||||
data = file_dict['data'],
|
data = file_dict['data'],
|
||||||
@ -328,24 +357,28 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
for key in file_dict['attributes_dict'].keys():
|
for key in file_dict['attributes_dict'].keys():
|
||||||
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
||||||
|
|
||||||
if 'h5' in filename:
|
#if 'h5' in filename:
|
||||||
|
|
||||||
# Create copy of original file to avoid possible file corruption and work with it.
|
# Create copy of original file to avoid possible file corruption and work with it.
|
||||||
backup_filename = 'backup_'+filename
|
#backup_filename = 'backup_'+filename
|
||||||
# Path
|
# Path
|
||||||
|
|
||||||
shutil.copy(os.path.join(dirpath,filename), os.path.join(tmp_dirpath,backup_filename))
|
#shutil.copy(os.path.join(dirpath,filename), os.path.join(tmp_dirpath,backup_filename))
|
||||||
# Open backup h5 file and copy complet filesystem directory onto a group in h5file
|
# Open backup h5 file and copy complet filesystem directory onto a group in h5file
|
||||||
with h5py.File(os.path.join(tmp_dirpath,backup_filename),'r') as src_file:
|
#with h5py.File(os.path.join(tmp_dirpath,backup_filename),'r') as src_file:
|
||||||
h5file.copy(source=src_file['/'],dest= group_name +'/'+filename)
|
# h5file.copy(source=src_file['/'],dest= group_name +'/'+filename)
|
||||||
|
|
||||||
|
# h5file.copy(source= file_obj, dest= group_name +'/'+filename)
|
||||||
|
|
||||||
|
|
||||||
# TODO: generilize to multiphase chemistry text and dat files
|
# TODO: generilize to multiphase chemistry text and dat files
|
||||||
# TODO: include header information from files as well
|
# TODO: include header information from files as well
|
||||||
if ('txt' in filename or 'TXT' in filename) and any([item in os.path.join(dirpath,filename) for item in ['smps','gas']]):
|
if ('txt' in filename or 'TXT' in filename) and any([item in os.path.join(dirpath,filename) for item in ['smps','gas']]):
|
||||||
if 'smps' in os.path.join(dirpath,filename):
|
#if 'smps' in os.path.join(dirpath,filename):
|
||||||
file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'smps')
|
# file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'smps')
|
||||||
elif 'gas' in os.path.join(dirpath,filename):
|
#elif 'gas' in os.path.join(dirpath,filename):
|
||||||
file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'gas')
|
# file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'gas')
|
||||||
|
|
||||||
|
file_dict = file_obj
|
||||||
|
|
||||||
# TODO: create datasets of compound data type to include variable/or column names and datetimestamps
|
# TODO: create datasets of compound data type to include variable/or column names and datetimestamps
|
||||||
h5file[group_name].create_group(filename)
|
h5file[group_name].create_group(filename)
|
||||||
@ -469,13 +502,10 @@ def main_5505():
|
|||||||
|
|
||||||
inputfile_dir = '\\\\fs101\\5505\\People\\Juan\\TypicalBeamTime'
|
inputfile_dir = '\\\\fs101\\5505\\People\\Juan\\TypicalBeamTime'
|
||||||
|
|
||||||
file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(inputfile_dir+'\\SES\\0069069_N1s_495eV.ibw')
|
|
||||||
group_by_type = lambda x : utils.group_by_df_column(x,'filetype')
|
|
||||||
|
|
||||||
select_file_keywords=[]
|
select_file_keywords=[]
|
||||||
select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
|
select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
|
||||||
create_hdf5_file_from_filesystem_path('test_sls_data.h5',inputfile_dir,select_dir_keywords,select_file_keywords)
|
create_hdf5_file_from_filesystem_path('test_sls_data_v1.h5',inputfile_dir,select_dir_keywords,select_file_keywords)
|
||||||
display_group_hierarchy_on_a_treemap('test_smog_chamber_v5.h5')
|
display_group_hierarchy_on_a_treemap('test_sls_data_v1.h5')
|
||||||
|
|
||||||
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
||||||
|
|
||||||
@ -486,8 +516,8 @@ def main_smog_chamber():
|
|||||||
include_list = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26']
|
include_list = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26']
|
||||||
select_date_list = ['20220726','2022.07.26']
|
select_date_list = ['20220726','2022.07.26']
|
||||||
|
|
||||||
create_hdf5_file_from_filesystem_path('test_smog_chamber_v5.h5',inputfile_dir,include_list,select_date_list)
|
create_hdf5_file_from_filesystem_path('test_smog_chamber_v6.h5',inputfile_dir,include_list,select_date_list)
|
||||||
display_group_hierarchy_on_a_treemap('test_smog_chamber_v5.h5')
|
display_group_hierarchy_on_a_treemap('test_smog_chamber_v6.h5')
|
||||||
|
|
||||||
def main_mtable_h5_from_dataframe():
|
def main_mtable_h5_from_dataframe():
|
||||||
|
|
||||||
@ -533,7 +563,9 @@ def main_mtable_h5_from_dataframe():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
main_mtable_h5_from_dataframe()
|
#main_smog_chamber()
|
||||||
|
#main_mtable_h5_from_dataframe()
|
||||||
|
main_5505()
|
||||||
|
|
||||||
print(':)')
|
print(':)')
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user