Modified to received unified dictionary structure and transform it into equivalent group datasets and attribute structure.
This commit is contained in:
@ -333,7 +333,24 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
|
|
||||||
#try:
|
#try:
|
||||||
if not 'h5' in filename:
|
if not 'h5' in filename:
|
||||||
file_obj = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
file_dict = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||||
|
|
||||||
|
if not file_dict:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# file_dict = file_obj
|
||||||
|
# Create group and add their attributes
|
||||||
|
h5file[group_name].create_group(name=file_dict['name'])
|
||||||
|
for key in file_dict['attributes_dict'].keys():
|
||||||
|
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
||||||
|
|
||||||
|
# Add datasets to just created group
|
||||||
|
for dataset in file_dict['datasets']:
|
||||||
|
h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'],
|
||||||
|
data = dataset['data'],
|
||||||
|
#dtype = file_dict['dtype'],
|
||||||
|
shape = dataset['shape'])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename),
|
config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename),
|
||||||
dest_file_obj = h5file,
|
dest_file_obj = h5file,
|
||||||
@ -343,19 +360,14 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
if 'ibw' in filename:
|
#if 'ibw' in filename:
|
||||||
#file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(os.path.join(dirpath,filename))
|
#file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(os.path.join(dirpath,filename))
|
||||||
file_dict = file_obj
|
|
||||||
|
|
||||||
h5file[group_name].create_dataset(name = file_dict['name'],
|
|
||||||
data = file_dict['data'],
|
|
||||||
#dtype = file_dict['dtype'],
|
|
||||||
shape = file_dict['shape'])
|
|
||||||
|
|
||||||
#h5file[group_name][file_dict['name']].dims[0] = file_dict['dimension_units']
|
#h5file[group_name][file_dict['name']].dims[0] = file_dict['dimension_units']
|
||||||
|
|
||||||
for key in file_dict['attributes_dict'].keys():
|
|
||||||
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
|
||||||
|
|
||||||
#if 'h5' in filename:
|
#if 'h5' in filename:
|
||||||
# Create copy of original file to avoid possible file corruption and work with it.
|
# Create copy of original file to avoid possible file corruption and work with it.
|
||||||
@ -372,29 +384,29 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, input_file_system_pat
|
|||||||
|
|
||||||
# TODO: generilize to multiphase chemistry text and dat files
|
# TODO: generilize to multiphase chemistry text and dat files
|
||||||
# TODO: include header information from files as well
|
# TODO: include header information from files as well
|
||||||
if ('txt' in filename or 'TXT' in filename) and any([item in os.path.join(dirpath,filename) for item in ['smps','gas']]):
|
# if ('txt' in filename or 'TXT' in filename) and any([item in os.path.join(dirpath,filename) for item in ['smps','gas']]):
|
||||||
#if 'smps' in os.path.join(dirpath,filename):
|
# #if 'smps' in os.path.join(dirpath,filename):
|
||||||
# file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'smps')
|
# # file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'smps')
|
||||||
#elif 'gas' in os.path.join(dirpath,filename):
|
# #elif 'gas' in os.path.join(dirpath,filename):
|
||||||
# file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'gas')
|
# file_dict = smog_chamber_file_reader.read_txt_files_as_dict(os.path.join(dirpath,filename),'gas')
|
||||||
|
|
||||||
file_dict = file_obj
|
# file_dict = file_obj
|
||||||
|
|
||||||
# TODO: create datasets of compound data type to include variable/or column names and datetimestamps
|
# # TODO: create datasets of compound data type to include variable/or column names and datetimestamps
|
||||||
h5file[group_name].create_group(filename)
|
# h5file[group_name].create_group(filename)
|
||||||
h5file[group_name][filename].create_dataset(name = 'data',
|
# h5file[group_name][filename].create_dataset(name = 'data',
|
||||||
data = file_dict['data'],
|
# data = file_dict['data'],
|
||||||
#dtype = file_dict['dtype'],
|
#dtype = file_dict['dtype'],
|
||||||
shape = file_dict['data'].shape)
|
# shape = file_dict['data'].shape)
|
||||||
|
|
||||||
|
|
||||||
h5file[group_name][filename].create_dataset(name = 'data_column_names',
|
# h5file[group_name][filename].create_dataset(name = 'data_column_names',
|
||||||
data = np.array(file_dict['data_column_names']),
|
# data = np.array(file_dict['data_column_names']),
|
||||||
#dtype = file_dict['dtype'],
|
#dtype = file_dict['dtype'],
|
||||||
shape = np.array(file_dict['data_column_names']).shape)
|
# shape = np.array(file_dict['data_column_names']).shape)
|
||||||
|
|
||||||
for key in file_dict['categ_data_dict'].keys():
|
# for key in file_dict['categ_data_dict'].keys():
|
||||||
h5file[group_name][filename].create_dataset(name=key,data=file_dict['categ_data_dict'][key])
|
# h5file[group_name][filename].create_dataset(name=key,data=file_dict['categ_data_dict'][key])
|
||||||
|
|
||||||
|
|
||||||
def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
def create_hdf5_file_from_dataframe(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
||||||
@ -504,20 +516,23 @@ def main_5505():
|
|||||||
|
|
||||||
select_file_keywords=[]
|
select_file_keywords=[]
|
||||||
select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
|
select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
|
||||||
create_hdf5_file_from_filesystem_path('test_sls_data_v1.h5',inputfile_dir,select_dir_keywords,select_file_keywords)
|
create_hdf5_file_from_filesystem_path('test_sls_data_v3.h5',inputfile_dir,select_dir_keywords,select_file_keywords)
|
||||||
display_group_hierarchy_on_a_treemap('test_sls_data_v1.h5')
|
display_group_hierarchy_on_a_treemap('test_sls_data_v3.h5')
|
||||||
|
|
||||||
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
||||||
|
|
||||||
def main_smog_chamber():
|
def main():
|
||||||
|
|
||||||
inputfile_dir = '\\\\fs03\\Iron_Sulphate'
|
|
||||||
include_list = ['htof','ams', 'ptr', 'gas','smps']
|
|
||||||
include_list = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26']
|
|
||||||
select_date_list = ['20220726','2022.07.26']
|
|
||||||
|
|
||||||
create_hdf5_file_from_filesystem_path('test_smog_chamber_v6.h5',inputfile_dir,include_list,select_date_list)
|
|
||||||
display_group_hierarchy_on_a_treemap('test_smog_chamber_v6.h5')
|
|
||||||
|
#select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps']
|
||||||
|
output_filename = config_file.output_filename
|
||||||
|
inputfile_dir = config_file.inputfile_dir #'\\\\fs03\\Iron_Sulphate'
|
||||||
|
select_dir_keywords = config_file.select_dir_keywords #['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26']
|
||||||
|
select_file_keywords = config_file.select_file_keywords #['20220726','2022.07.26']
|
||||||
|
|
||||||
|
create_hdf5_file_from_filesystem_path(output_filename,inputfile_dir,select_dir_keywords,select_file_keywords)
|
||||||
|
display_group_hierarchy_on_a_treemap(output_filename)
|
||||||
|
|
||||||
def main_mtable_h5_from_dataframe():
|
def main_mtable_h5_from_dataframe():
|
||||||
|
|
||||||
@ -563,9 +578,9 @@ def main_mtable_h5_from_dataframe():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
#main_smog_chamber()
|
main()
|
||||||
#main_mtable_h5_from_dataframe()
|
#main_mtable_h5_from_dataframe()
|
||||||
main_5505()
|
#main_5505()
|
||||||
|
|
||||||
print(':)')
|
print(':)')
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user