Updated function create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path), which now can integrate xps ibw files as datasets in the resulting file.
This commit is contained in:
129
hdf5_lib.py
129
hdf5_lib.py
@@ -9,12 +9,12 @@ import matplotlib.pyplot as plt
|
|||||||
import plotly.express as px
|
import plotly.express as px
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
from plotly.subplots import make_subplots
|
from plotly.subplots import make_subplots
|
||||||
import igor2
|
|
||||||
from igor2.binarywave import load as loadibw
|
import g5505_file_reader
|
||||||
|
|
||||||
def read_mtable_as_dataframe(filename):
|
def read_mtable_as_dataframe(filename):
|
||||||
|
|
||||||
""" Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input h5. file
|
""" Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input .h5 file
|
||||||
contains as many groups as rows in the Matlab Table, and each group stores dataset-like variables in the Table as
|
contains as many groups as rows in the Matlab Table, and each group stores dataset-like variables in the Table as
|
||||||
Datasets while categorical and numerical variables in the table are represented as attributes of each group.
|
Datasets while categorical and numerical variables in the table are represented as attributes of each group.
|
||||||
|
|
||||||
@@ -70,6 +70,35 @@ def read_mtable_as_dataframe(filename):
|
|||||||
output_dataframe.loc[:,column_label] = tmp_col
|
output_dataframe.loc[:,column_label] = tmp_col
|
||||||
|
|
||||||
return output_dataframe
|
return output_dataframe
|
||||||
|
|
||||||
|
def create_group_hierarchy(obj, df, columns):
|
||||||
|
|
||||||
|
"""
|
||||||
|
Input:
|
||||||
|
obj (h5py.File or h5py.Group)
|
||||||
|
columns (list of strs): denote categorical columns in df to be used to define hdf5 file group hierarchy
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not columns:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Determine categories associated with first categorical column
|
||||||
|
unique_values = df[columns[0]].unique()
|
||||||
|
|
||||||
|
if obj.name == '/':
|
||||||
|
obj.attrs.create('count',df.shape[0])
|
||||||
|
|
||||||
|
for group_name in unique_values:
|
||||||
|
|
||||||
|
group = obj.require_group(group_name)
|
||||||
|
group.attrs.create('column_name', columns[0])
|
||||||
|
|
||||||
|
sub_df = df[df[columns[0]]==group_name] # same as df.loc[df[columns[0]]==group_name,:]
|
||||||
|
group.attrs.create('count',sub_df.shape[0])
|
||||||
|
|
||||||
|
# if group_name == 'MgO powder,H2O,HCl':
|
||||||
|
# print('Here:',sub_df.shape)
|
||||||
|
create_group_hierarchy(group, sub_df, columns[1::])
|
||||||
|
|
||||||
def is_callable_list(x : list):
|
def is_callable_list(x : list):
|
||||||
return all([callable(item) for item in x])
|
return all([callable(item) for item in x])
|
||||||
@@ -113,51 +142,27 @@ def get_attr_names(input_data):
|
|||||||
raise ValueError("input_data must be a pd.DataFrame")
|
raise ValueError("input_data must be a pd.DataFrame")
|
||||||
|
|
||||||
return input_data.columns
|
return input_data.columns
|
||||||
|
|
||||||
def create_group_hierarchy(obj, df, columns):
|
|
||||||
|
|
||||||
"""
|
|
||||||
Input:
|
|
||||||
obj (h5py.File or h5py.Group)
|
|
||||||
columns (list of strs): denote categorical columns in df to be used to define hdf5 file group hierarchy
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not columns:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Determine categories associated with first categorical column
|
|
||||||
unique_values = df[columns[0]].unique()
|
|
||||||
|
|
||||||
if obj.name == '/':
|
|
||||||
obj.attrs.create('count',df.shape[0])
|
|
||||||
|
|
||||||
for group_name in unique_values:
|
|
||||||
|
|
||||||
group = obj.require_group(group_name)
|
|
||||||
group.attrs.create('column_name', columns[0])
|
|
||||||
|
|
||||||
sub_df = df[df[columns[0]]==group_name] # same as df.loc[df[columns[0]]==group_name,:]
|
|
||||||
group.attrs.create('count',sub_df.shape[0])
|
|
||||||
|
|
||||||
# if group_name == 'MgO powder,H2O,HCl':
|
|
||||||
# print('Here:',sub_df.shape)
|
|
||||||
create_group_hierarchy(group, sub_df, columns[1::])
|
|
||||||
|
|
||||||
def get_parent_child_relationships(file: h5py.File):
|
def get_parent_child_relationships(file: h5py.File):
|
||||||
|
|
||||||
nodes = ['/']
|
nodes = ['/']
|
||||||
parent = ['']
|
parent = ['']
|
||||||
values = [file.attrs['count']]
|
#values = [file.attrs['count']]
|
||||||
|
values = [len(file.attrs['file_list'])]
|
||||||
|
|
||||||
def node_visitor(name,obj):
|
def node_visitor(name,obj):
|
||||||
if isinstance(obj,h5py.Group):
|
#if isinstance(obj,h5py.Group):
|
||||||
nodes.append(obj.name)
|
nodes.append(obj.name)
|
||||||
parent.append(obj.parent.name)
|
parent.append(obj.parent.name)
|
||||||
#nodes.append(os.path.split(obj.name)[1])
|
#nodes.append(os.path.split(obj.name)[1])
|
||||||
#parent.append(os.path.split(obj.parent.name)[1])
|
#parent.append(os.path.split(obj.parent.name)[1])
|
||||||
values.append(obj.attrs['count'])
|
if isinstance(obj,h5py.Dataset):
|
||||||
|
values.append(1)
|
||||||
|
else:
|
||||||
|
values.append(len(obj.attrs['file_list']))
|
||||||
|
|
||||||
file.visititems(node_visitor)
|
file.visititems(node_visitor)
|
||||||
|
|
||||||
return nodes, parent, values
|
return nodes, parent, values
|
||||||
|
|
||||||
|
|
||||||
@@ -207,8 +212,8 @@ def display_group_hierarchy_on_a_treemap(filename: str):
|
|||||||
fig.add_trace(go.Treemap(
|
fig.add_trace(go.Treemap(
|
||||||
labels=nodes, #formating_df['formated_names'][nodes],
|
labels=nodes, #formating_df['formated_names'][nodes],
|
||||||
parents=parents,#formating_df['formated_names'][parents],
|
parents=parents,#formating_df['formated_names'][parents],
|
||||||
#values=values,
|
values=values,
|
||||||
branchvalues='total',
|
branchvalues='remainder',
|
||||||
customdata= customdata_series,
|
customdata= customdata_series,
|
||||||
#marker=dict(
|
#marker=dict(
|
||||||
# colors=df_all_trees['color'],
|
# colors=df_all_trees['color'],
|
||||||
@@ -231,11 +236,13 @@ def annotate_root_dir(filename,annotation_dict: dict):
|
|||||||
|
|
||||||
def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path):
|
def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path):
|
||||||
|
|
||||||
with h5py.File(ofilename, 'w') as file:
|
with h5py.File(ofilename, 'w') as h5file:
|
||||||
|
|
||||||
root_dir = '?##'
|
root_dir = '?##'
|
||||||
|
|
||||||
for dirpath, dirnames, filenames in os.walk(input_file_system_path,topdown=True):
|
# loops over (or visits each) subdirectories from root directory defined by input_file_sytem_path to the lower
|
||||||
|
#level subfolders
|
||||||
|
for dirpath, dirnames, filenames_list in os.walk(input_file_system_path,topdown=True):
|
||||||
|
|
||||||
group_name = dirpath.replace(os.sep,'/')
|
group_name = dirpath.replace(os.sep,'/')
|
||||||
|
|
||||||
@@ -243,16 +250,30 @@ def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path):
|
|||||||
# Set root_dir to top directory path in input file system
|
# Set root_dir to top directory path in input file system
|
||||||
root_dir = group_name
|
root_dir = group_name
|
||||||
group_name = group_name.replace(root_dir,'/')
|
group_name = group_name.replace(root_dir,'/')
|
||||||
file.create_dataset(name='file_list',data=filenames)
|
#h5file.attrs.create(name='count',data=len(filenames_list))
|
||||||
file.attrs.create(name='count',data=len(filenames))
|
h5file.attrs.create(name='file_list',data=filenames_list)
|
||||||
else:
|
else:
|
||||||
group_name = group_name.replace(root_dir+'/','/')
|
group_name = group_name.replace(root_dir+'/','/')
|
||||||
# Group hierarchy is implicitly defined by the forward slashes
|
# Group hierarchy is implicitly defined by the forward slashes
|
||||||
file.create_group(group_name)
|
h5file.create_group(group_name)
|
||||||
file[group_name].create_dataset(name='file_list',data=filenames)
|
h5file[group_name].attrs.create(name='file_list',data=filenames_list)
|
||||||
file[group_name].attrs.create(name='count',data=len(filenames))
|
|
||||||
|
|
||||||
file.attrs['count'] = file.attrs['count'] + file[group_name].attrs['count']
|
# TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)
|
||||||
|
|
||||||
|
for filename in filenames_list:
|
||||||
|
|
||||||
|
if 'ibw' in filename:
|
||||||
|
file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(os.path.join(dirpath,filename))
|
||||||
|
|
||||||
|
h5file[group_name].create_dataset(name = file_dict['name'],
|
||||||
|
data = file_dict['data'],
|
||||||
|
#dtype = file_dict['dtype'],
|
||||||
|
shape = file_dict['shape'])
|
||||||
|
|
||||||
|
#h5file[group_name][file_dict['name']].dims[0] = file_dict['dimension_units']
|
||||||
|
|
||||||
|
for key in file_dict['attributes_dict'].keys():
|
||||||
|
h5file[group_name][file_dict['name']].attrs.create(name=key,data=file_dict['attributes_dict'][key])
|
||||||
|
|
||||||
|
|
||||||
def create_hdf5_file(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
def create_hdf5_file(ofilename, input_data, approach : str, group_by_funcs : list, extract_attrs_func = None):
|
||||||
@@ -398,14 +419,22 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame)
|
|||||||
|
|
||||||
return input_data
|
return input_data
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
inputfile_dir = 'Z:\\People\\Juan\\TypicalBeamTime'
|
|
||||||
|
def main1():
|
||||||
|
|
||||||
|
inputfile_dir = '\\\\fs101\\5505\\People\\Juan\\TypicalBeamTime'
|
||||||
|
|
||||||
|
#ibw_file = loadibw(inputfile_dir+'\\SES\\0069069_N1s_495eV.ibw')
|
||||||
|
|
||||||
|
file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(inputfile_dir+'\\SES\\0069069_N1s_495eV.ibw')
|
||||||
|
|
||||||
group_by_type = lambda x : group_by_df_column(x,'filetype')
|
group_by_type = lambda x : group_by_df_column(x,'filetype')
|
||||||
create_hdf5_file_from_filesystem_path('test2.h5', inputfile_dir)
|
#create_hdf5_file_from_filesystem_path('test2.h5',inputfile_dir)
|
||||||
display_group_hierarchy_on_a_treemap('test2.h5')
|
display_group_hierarchy_on_a_treemap('test2.h5')
|
||||||
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
|
||||||
|
|
||||||
|
def main2():
|
||||||
# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table
|
# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table
|
||||||
input_data_df = read_mtable_as_dataframe('input_files\\BeamTimeMetaData.h5')
|
input_data_df = read_mtable_as_dataframe('input_files\\BeamTimeMetaData.h5')
|
||||||
|
|
||||||
@@ -446,5 +475,7 @@ def main():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
main()
|
main1()
|
||||||
|
|
||||||
|
print(':)')
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user