diff --git a/hdf5_lib.py b/hdf5_lib.py index 9331519..08529ba 100644 --- a/hdf5_lib.py +++ b/hdf5_lib.py @@ -9,6 +9,8 @@ import matplotlib.pyplot as plt import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots +import igor2 +from igor2.binarywave import load as loadibw def read_mtable_as_dataframe(filename): @@ -205,7 +207,7 @@ def display_group_hierarchy_on_a_treemap(filename: str): fig.add_trace(go.Treemap( labels=nodes, #formating_df['formated_names'][nodes], parents=parents,#formating_df['formated_names'][parents], - values=values, + #values=values, branchvalues='total', customdata= customdata_series, #marker=dict( @@ -226,7 +228,33 @@ def annotate_root_dir(filename,annotation_dict: dict): file.attrs.create('metadata_'+key, annotation_dict[key]) - + +def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path): + + with h5py.File(ofilename, 'w') as file: + + root_dir = '?##' + + for dirpath, dirnames, filenames in os.walk(input_file_system_path,topdown=True): + + group_name = dirpath.replace(os.sep,'/') + + if root_dir == '?##': + # Set root_dir to top directory path in input file system + root_dir = group_name + group_name = group_name.replace(root_dir,'/') + file.create_dataset(name='file_list',data=filenames) + file.attrs.create(name='count',data=len(filenames)) + else: + group_name = group_name.replace(root_dir+'/','/') + # Group hierarchy is implicitly defined by the forward slashes + file.create_group(group_name) + file[group_name].create_dataset(name='file_list',data=filenames) + file[group_name].attrs.create(name='count',data=len(filenames)) + + file.attrs['count'] = file.attrs['count'] + file[group_name].attrs['count'] + + def create_hdf5_file(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None): """ Creates an hdf5 file with as many levels as indicated by len(group_by_funcs). @@ -247,11 +275,19 @@ def create_hdf5_file(ofilename, input_data, approach : str, group_by_funcs : lis """ # Check whether input_data is a valid file-system path or a DataFrame - check_possible_path = lambda x : os.path.exists(input_data) if isinstance(input_data,str) else False + is_valid_path = lambda x : os.path.exists(input_data) if isinstance(input_data,str) else False + + if is_valid_path(input_data): - if check_possible_path(input_data): file_list = os.listdir(input_data) - df = pd.DataFrame(file_list,columns='filename') + + # Navigates file-system folders/directories from top to bottom. + #for dirpath, dirnames, filenames in os.walk(input_data,topdown=True): + + + #df = pd.DataFrame(file_list,columns=['filename']) + df = augment_with_filetype(df) + elif isinstance(input_data,pd.DataFrame): df = input_data.copy() else: @@ -364,6 +400,12 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame) def main(): + inputfile_dir = 'Z:\\People\\Juan\\TypicalBeamTime' + group_by_type = lambda x : group_by_df_column(x,'filetype') + create_hdf5_file_from_filesystem_path('test2.h5', inputfile_dir) + display_group_hierarchy_on_a_treemap('test2.h5') + #create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None) + # Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table input_data_df = read_mtable_as_dataframe('input_files\\BeamTimeMetaData.h5')