Implemented partially the func create_hdf5_file_from_filesystem_path, which reads a file system path and copies if directory structure in a hdf5 file. Next steps require aligning the structure in the context of our FAIR data file hierarchy and adding data to it. Last commit of the year :). Happy holidays!
This commit is contained in:
52
hdf5_lib.py
52
hdf5_lib.py
@ -9,6 +9,8 @@ import matplotlib.pyplot as plt
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
from plotly.subplots import make_subplots
|
||||
import igor2
|
||||
from igor2.binarywave import load as loadibw
|
||||
|
||||
def read_mtable_as_dataframe(filename):
|
||||
|
||||
@ -205,7 +207,7 @@ def display_group_hierarchy_on_a_treemap(filename: str):
|
||||
fig.add_trace(go.Treemap(
|
||||
labels=nodes, #formating_df['formated_names'][nodes],
|
||||
parents=parents,#formating_df['formated_names'][parents],
|
||||
values=values,
|
||||
#values=values,
|
||||
branchvalues='total',
|
||||
customdata= customdata_series,
|
||||
#marker=dict(
|
||||
@ -226,7 +228,33 @@ def annotate_root_dir(filename,annotation_dict: dict):
|
||||
file.attrs.create('metadata_'+key, annotation_dict[key])
|
||||
|
||||
|
||||
|
||||
|
||||
def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path):
|
||||
|
||||
with h5py.File(ofilename, 'w') as file:
|
||||
|
||||
root_dir = '?##'
|
||||
|
||||
for dirpath, dirnames, filenames in os.walk(input_file_system_path,topdown=True):
|
||||
|
||||
group_name = dirpath.replace(os.sep,'/')
|
||||
|
||||
if root_dir == '?##':
|
||||
# Set root_dir to top directory path in input file system
|
||||
root_dir = group_name
|
||||
group_name = group_name.replace(root_dir,'/')
|
||||
file.create_dataset(name='file_list',data=filenames)
|
||||
file.attrs.create(name='count',data=len(filenames))
|
||||
else:
|
||||
group_name = group_name.replace(root_dir+'/','/')
|
||||
# Group hierarchy is implicitly defined by the forward slashes
|
||||
file.create_group(group_name)
|
||||
file[group_name].create_dataset(name='file_list',data=filenames)
|
||||
file[group_name].attrs.create(name='count',data=len(filenames))
|
||||
|
||||
file.attrs['count'] = file.attrs['count'] + file[group_name].attrs['count']
|
||||
|
||||
|
||||
def create_hdf5_file(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
||||
|
||||
""" Creates an hdf5 file with as many levels as indicated by len(group_by_funcs).
|
||||
@ -247,11 +275,19 @@ def create_hdf5_file(ofilename, input_data, approach : str, group_by_funcs : lis
|
||||
"""
|
||||
|
||||
# Check whether input_data is a valid file-system path or a DataFrame
|
||||
check_possible_path = lambda x : os.path.exists(input_data) if isinstance(input_data,str) else False
|
||||
is_valid_path = lambda x : os.path.exists(input_data) if isinstance(input_data,str) else False
|
||||
|
||||
if is_valid_path(input_data):
|
||||
|
||||
if check_possible_path(input_data):
|
||||
file_list = os.listdir(input_data)
|
||||
df = pd.DataFrame(file_list,columns='filename')
|
||||
|
||||
# Navigates file-system folders/directories from top to bottom.
|
||||
#for dirpath, dirnames, filenames in os.walk(input_data,topdown=True):
|
||||
|
||||
|
||||
#df = pd.DataFrame(file_list,columns=['filename'])
|
||||
df = augment_with_filetype(df)
|
||||
|
||||
elif isinstance(input_data,pd.DataFrame):
|
||||
df = input_data.copy()
|
||||
else:
|
||||
@ -364,6 +400,12 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame)
|
||||
|
||||
def main():
|
||||
|
||||
inputfile_dir = 'Z:\\People\\Juan\\TypicalBeamTime'
|
||||
group_by_type = lambda x : group_by_df_column(x,'filetype')
|
||||
create_hdf5_file_from_filesystem_path('test2.h5', inputfile_dir)
|
||||
display_group_hierarchy_on_a_treemap('test2.h5')
|
||||
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
||||
|
||||
# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table
|
||||
input_data_df = read_mtable_as_dataframe('input_files\\BeamTimeMetaData.h5')
|
||||
|
||||
|
Reference in New Issue
Block a user