Merge branch 'main' of https://gitlab.psi.ch/5505/dima
This commit is contained in:
@ -481,8 +481,8 @@ import os
|
||||
#import src.hdf5_lib as h5lib
|
||||
import src.g5505_utils as utils
|
||||
import h5py
|
||||
|
||||
def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date, script_name):
|
||||
import src.metadata_review_lib as metadata_lib
|
||||
def save_processed_dataframe_to_hdf5(df, annotator, output_filename): # src_hdf5_path, script_date, script_name):
|
||||
"""
|
||||
Save processed dataframe columns with annotations to an HDF5 file.
|
||||
|
||||
@ -495,7 +495,9 @@ def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date,
|
||||
"""
|
||||
# Convert datetime columns to string
|
||||
datetime_cols = df.select_dtypes(include=['datetime64']).columns
|
||||
df[datetime_cols] = df[datetime_cols].apply(str)
|
||||
|
||||
if list(datetime_cols):
|
||||
df[datetime_cols] = df[datetime_cols].map(str)
|
||||
|
||||
# Convert dataframe to structured array
|
||||
icad_data_table = utils.dataframe_to_np_structured_array(df)
|
||||
@ -503,6 +505,10 @@ def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date,
|
||||
# Get metadata
|
||||
metadata_dict = annotator.get_metadata()
|
||||
|
||||
# Prepare project level attributes to be added at the root level
|
||||
|
||||
project_level_attributes = metadata_dict['metadata']['project']
|
||||
|
||||
# Prepare high-level attributes
|
||||
high_level_attributes = {
|
||||
'parent_files': metadata_dict['parent_files'],
|
||||
@ -514,13 +520,14 @@ def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date,
|
||||
# Prepare data level attributes
|
||||
data_level_attributes = metadata_dict['metadata']['datasets']
|
||||
|
||||
# Generate output filename
|
||||
parent_file_name = os.path.split(src_hdf5_path)[1]
|
||||
output_filename = f'data_products/processed/fig_{script_date}_{parent_file_name}'
|
||||
for key, value in data_level_attributes.items():
|
||||
if isinstance(value,dict):
|
||||
data_level_attributes[key] = metadata_lib.parse_attribute(value)
|
||||
|
||||
|
||||
# Prepare file dictionary
|
||||
file_dict = {
|
||||
'name': script_name,
|
||||
'name': project_level_attributes['script_name'],
|
||||
'attributes_dict': high_level_attributes,
|
||||
'datasets': [{
|
||||
'name': "data_table",
|
||||
@ -530,8 +537,19 @@ def save_processed_dataframe_to_hdf5(df, annotator, src_hdf5_path, script_date,
|
||||
}]
|
||||
}
|
||||
|
||||
# Check if the file exists
|
||||
if os.path.exists(output_filename):
|
||||
mode = "a"
|
||||
print(f"File {output_filename} exists. Opening in append mode.")
|
||||
else:
|
||||
mode = "w"
|
||||
print(f"File {output_filename} does not exist. Creating a new file.")
|
||||
|
||||
|
||||
# Write to HDF5
|
||||
with h5py.File(output_filename, 'w') as h5file:
|
||||
with h5py.File(output_filename, mode) as h5file:
|
||||
# Add project level attributes at the root/top level
|
||||
h5file.attrs.update(project_level_attributes)
|
||||
transfer_file_dict_to_hdf5(h5file, '/', file_dict)
|
||||
|
||||
|
||||
|
@ -322,30 +322,55 @@ class MetadataHarvester:
|
||||
parent_files = []
|
||||
self.parent_files = parent_files
|
||||
self.metadata = {
|
||||
"project": {},
|
||||
"sample": {},
|
||||
"environment": {},
|
||||
"instruments": {},
|
||||
"datasets": {}
|
||||
}
|
||||
|
||||
def add_sample_info(self, key_or_dict, value=None):
|
||||
self._add_info("sample", key_or_dict, value)
|
||||
def add_project_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("project", key_or_dict, value, append)
|
||||
|
||||
def add_environment_info(self, key_or_dict, value=None):
|
||||
self._add_info("environment", key_or_dict, value)
|
||||
def add_sample_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("sample", key_or_dict, value, append)
|
||||
|
||||
def add_instrument_info(self, key_or_dict, value=None):
|
||||
self._add_info("instruments", key_or_dict, value)
|
||||
def add_environment_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("environment", key_or_dict, value, append)
|
||||
|
||||
def add_dataset_info(self, key_or_dict, value=None):
|
||||
self._add_info("datasets", key_or_dict, value)
|
||||
def add_instrument_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("instruments", key_or_dict, value, append)
|
||||
|
||||
def _add_info(self, category, key_or_dict, value):
|
||||
def add_dataset_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("datasets", key_or_dict, value, append)
|
||||
|
||||
def _add_info(self, category, key_or_dict, value, append):
|
||||
"""Internal helper method to add information to a category."""
|
||||
if isinstance(key_or_dict, dict):
|
||||
self.metadata[category].update(key_or_dict)
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = value
|
||||
if key_or_dict in self.metadata[category]:
|
||||
if append:
|
||||
current_value = self.metadata[category][key_or_dict]
|
||||
|
||||
if isinstance(current_value, list):
|
||||
|
||||
if not isinstance(value, list):
|
||||
# Append the new value to the list
|
||||
self.metadata[category][key_or_dict].append(value)
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = current_value + value
|
||||
|
||||
elif isinstance(current_value, str):
|
||||
# Append the new value as a comma-separated string
|
||||
self.metadata[category][key_or_dict] = current_value + ',' + str(value)
|
||||
else:
|
||||
# Handle other types (for completeness, usually not required)
|
||||
self.metadata[category][key_or_dict] = [current_value, value]
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = value
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = value
|
||||
|
||||
def get_metadata(self):
|
||||
return {
|
||||
@ -365,6 +390,7 @@ class MetadataHarvester:
|
||||
|
||||
def clear_metadata(self):
|
||||
self.metadata = {
|
||||
"project": {},
|
||||
"sample": {},
|
||||
"environment": {},
|
||||
"instruments": {},
|
||||
|
Reference in New Issue
Block a user