Developed a metadata harvesting object to facilitate metadata collection throught the code.

This commit is contained in:
2024-06-13 15:47:02 +02:00
parent f43d86e729
commit 0fb14b7c6c

View File

@ -316,6 +316,82 @@ def last_submit_metadata_review(reviewer_attrs):
#import config_file
#import hdf5_vis
class MetadataHarvester:
def __init__(self, parent_files=None):
if parent_files is None:
parent_files = []
self.parent_files = parent_files
self.metadata = {
"samples": {},
"environment": {},
"instrument": {},
"data_level": {}
}
def add_sample_info(self, key_or_dict, value=None):
self._add_info("samples", key_or_dict, value)
def add_environment_info(self, key_or_dict, value=None):
self._add_info("environment", key_or_dict, value)
def add_instrument_info(self, key_or_dict, value=None):
self._add_info("instrument", key_or_dict, value)
def add_data_level_info(self, key_or_dict, value=None):
self._add_info("data_level", key_or_dict, value)
def _add_info(self, category, key_or_dict, value):
"""Internal helper method to add information to a category."""
if isinstance(key_or_dict, dict):
self.metadata[category].update(key_or_dict)
else:
self.metadata[category][key_or_dict] = value
def get_metadata(self):
return {
"parent_files": self.parent_files,
"metadata": self.metadata
}
def clear_metadata(self):
self.metadata = {
"samples": {},
"environment": {},
"instrument": {},
"data_level": {}
}
self.parent_files = []
# Example usage
if __name__ == "__main__":
parent_files = ["file1.txt", "file2.txt", "file3.txt"]
harvester = MetadataHarvester(parent_files)
# Adding sample information
harvester.add_sample_info("sample_id", "S001")
harvester.add_sample_info({"sample_type": "blood", "sample_volume": "5ml"})
# Adding environment information
harvester.add_environment_info("location", "lab_1")
harvester.add_environment_info({"temperature": "22C", "humidity": "50%"})
# Adding instrument information
harvester.add_instrument_info("instrument_id", "I123")
harvester.add_instrument_info({"instrument_type": "microscope", "manufacturer": "XYZ Corp"})
# Adding data level information
harvester.add_data_level_info("processing_step", "initial")
harvester.add_data_level_info({"data_quality": "high", "data_format": "CSV"})
# Retrieving metadata
metadata = harvester.get_metadata()
print(metadata)
# Clear metadata if needed
harvester.clear_metadata()
print("Cleared metadata:", harvester.get_metadata())
def main():
output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"