Replaced read_dataset_from_hdf5file(hdf5_file_path, dataset_path) with HDF5DataOpsManager.extract_dataset_as_dataframe(self,dataset_name)
This commit is contained in:
@ -22,9 +22,11 @@ import copy
|
|||||||
class HDF5DataOpsManager():
|
class HDF5DataOpsManager():
|
||||||
|
|
||||||
"""
|
"""
|
||||||
A class to handle HDF5 file operations.
|
A class to handle HDF5 fundamental middle level file operations to power data updates, metadata revision, and data analysis
|
||||||
|
with hdf5 files encoding multi-instrument experimental campaign data.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
-----------
|
||||||
path_to_file : str
|
path_to_file : str
|
||||||
path/to/hdf5file.
|
path/to/hdf5file.
|
||||||
mode : str
|
mode : str
|
||||||
@ -85,7 +87,6 @@ class HDF5DataOpsManager():
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def extract_dataset_as_dataframe(self,dataset_name):
|
def extract_dataset_as_dataframe(self,dataset_name):
|
||||||
"""
|
"""
|
||||||
returns a copy of the dataset content in the form of dataframe when possible or numpy array
|
returns a copy of the dataset content in the form of dataframe when possible or numpy array
|
||||||
@ -423,29 +424,6 @@ class HDF5DataOpsManager():
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def read_dataset_from_hdf5file(hdf5_file_path, dataset_path):
|
|
||||||
# Open the HDF5 file
|
|
||||||
with h5py.File(hdf5_file_path, 'r') as hdf:
|
|
||||||
# Load the dataset
|
|
||||||
dataset = hdf[dataset_path]
|
|
||||||
data = np.empty(dataset.shape, dtype=dataset.dtype)
|
|
||||||
dataset.read_direct(data)
|
|
||||||
df = pd.DataFrame(data)
|
|
||||||
|
|
||||||
for col_name in df.select_dtypes(exclude='number'):
|
|
||||||
df[col_name] = df[col_name].str.decode('utf-8') #apply(lambda x: x.decode('utf-8') if isinstance(x,bytes) else x)
|
|
||||||
## Extract metadata (attributes) and convert to a dictionary
|
|
||||||
#metadata = hdf5_vis.construct_attributes_dict(hdf[dataset_name].attrs)
|
|
||||||
## Create a one-row DataFrame with the metadata
|
|
||||||
#metadata_df = pd.DataFrame.from_dict(data, orient='columns')
|
|
||||||
return df
|
|
||||||
|
|
||||||
def get_parent_child_relationships(file: h5py.File):
|
def get_parent_child_relationships(file: h5py.File):
|
||||||
|
|
||||||
nodes = ['/']
|
nodes = ['/']
|
||||||
|
Reference in New Issue
Block a user