diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index af5c0c2..32fcfc9 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -333,7 +333,24 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat #try: if not 'h5' in filename: - file_obj = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) + file_dict = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) + + if not file_dict: + continue + + # file_dict = file_obj + # Create group and add their attributes + h5file[group_name].create_group(name=file_dict['name']) + for key in file_dict['attributes_dict'].keys(): + h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key]) + + # Add datasets to just created group + for dataset in file_dict['datasets']: + h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'], + data = dataset['data'], + #dtype = file_dict['dtype'], + shape = dataset['shape']) + else: config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename), dest_file_obj = h5file, @@ -343,19 +360,14 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat - if 'ibw' in filename: + #if 'ibw' in filename: #file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(os.path.join(dirpath,filename)) - file_dict = file_obj - - h5file[group_name].create_dataset(name = file_dict['name'], - data = file_dict['data'], - #dtype = file_dict['dtype'], - shape = file_dict['shape']) + + #h5file[group_name][file_dict['name']].dims[0] = file_dict['dimension_units'] - for key in file_dict['attributes_dict'].keys(): - h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key]) + #if 'h5' in filename: # Create copy of original file to avoid possible file corruption and work with it. @@ -372,29 +384,29 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat # TODO: generilize to multiphase chemistry text and dat files # TODO: include header information from files as well - if ('txt' in filename or 'TXT' in filename) and any([item in os.path.join(dirpath,filename) for item in ['smps','gas']]): - #if 'smps' in os.path.join(dirpath,filename): - # file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'smps') - #elif 'gas' in os.path.join(dirpath,filename): +# if ('txt' in filename or 'TXT' in filename) and any([item in os.path.join(dirpath,filename) for item in ['smps','gas']]): +# #if 'smps' in os.path.join(dirpath,filename): +# # file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'smps') +# #elif 'gas' in os.path.join(dirpath,filename): # file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'gas') - file_dict = file_obj +# file_dict = file_obj - # TODO: create datasets of compound data type to include variable/or column names and datetimestamps - h5file[group_name].create_group(filename) - h5file[group_name][filename].create_dataset(name = 'data', - data = file_dict['data'], +# # TODO: create datasets of compound data type to include variable/or column names and datetimestamps +# h5file[group_name].create_group(filename) +# h5file[group_name][filename].create_dataset(name = 'data', +# data = file_dict['data'], #dtype = file_dict['dtype'], - shape = file_dict['data'].shape) +# shape = file_dict['data'].shape) - h5file[group_name][filename].create_dataset(name = 'data_column_names', - data = np.array(file_dict['data_column_names']), +# h5file[group_name][filename].create_dataset(name = 'data_column_names', +# data = np.array(file_dict['data_column_names']), #dtype = file_dict['dtype'], - shape = np.array(file_dict['data_column_names']).shape) +# shape = np.array(file_dict['data_column_names']).shape) - for key in file_dict['categ_data_dict'].keys(): - h5file[group_name][filename].create_dataset(name=key,data=file_dict['categ_data_dict'][key]) +# for key in file_dict['categ_data_dict'].keys(): +# h5file[group_name][filename].create_dataset(name=key,data=file_dict['categ_data_dict'][key]) def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None): @@ -504,20 +516,23 @@ def main_5505(): select_file_keywords=[] select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES'] - create_hdf5_file_from_filesystem_path('test_sls_data_v1.h5',inputfile_dir,select_dir_keywords,select_file_keywords) - display_group_hierarchy_on_a_treemap('test_sls_data_v1.h5') + create_hdf5_file_from_filesystem_path('test_sls_data_v3.h5',inputfile_dir,select_dir_keywords,select_file_keywords) + display_group_hierarchy_on_a_treemap('test_sls_data_v3.h5') #create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None) -def main_smog_chamber(): - - inputfile_dir = '\\\\fs03\\Iron_Sulphate' - include_list = ['htof','ams', 'ptr', 'gas','smps'] - include_list = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26'] - select_date_list = ['20220726','2022.07.26'] +def main(): - create_hdf5_file_from_filesystem_path('test_smog_chamber_v6.h5',inputfile_dir,include_list,select_date_list) - display_group_hierarchy_on_a_treemap('test_smog_chamber_v6.h5') + + + #select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps'] + output_filename = config_file.output_filename + inputfile_dir = config_file.inputfile_dir #'\\\\fs03\\Iron_Sulphate' + select_dir_keywords = config_file.select_dir_keywords #['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26'] + select_file_keywords = config_file.select_file_keywords #['20220726','2022.07.26'] + + create_hdf5_file_from_filesystem_path(output_filename,inputfile_dir,select_dir_keywords,select_file_keywords) + display_group_hierarchy_on_a_treemap(output_filename) def main_mtable_h5_from_dataframe(): @@ -563,9 +578,9 @@ def main_mtable_h5_from_dataframe(): if __name__ == '__main__': - #main_smog_chamber() + main() #main_mtable_h5_from_dataframe() - main_5505() + #main_5505() print(':)')