From 0fb14b7c6c28ef73f3d80a285d0564469f4dce38 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 13 Jun 2024 15:47:02 +0200 Subject: [PATCH] Developed a metadata harvesting object to facilitate metadata collection throught the code. --- src/metadata_review_lib.py | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/src/metadata_review_lib.py b/src/metadata_review_lib.py index 7627231..2a1398c 100644 --- a/src/metadata_review_lib.py +++ b/src/metadata_review_lib.py @@ -316,6 +316,82 @@ def last_submit_metadata_review(reviewer_attrs): #import config_file #import hdf5_vis +class MetadataHarvester: + def __init__(self, parent_files=None): + if parent_files is None: + parent_files = [] + self.parent_files = parent_files + self.metadata = { + "samples": {}, + "environment": {}, + "instrument": {}, + "data_level": {} + } + + def add_sample_info(self, key_or_dict, value=None): + self._add_info("samples", key_or_dict, value) + + def add_environment_info(self, key_or_dict, value=None): + self._add_info("environment", key_or_dict, value) + + def add_instrument_info(self, key_or_dict, value=None): + self._add_info("instrument", key_or_dict, value) + + def add_data_level_info(self, key_or_dict, value=None): + self._add_info("data_level", key_or_dict, value) + + def _add_info(self, category, key_or_dict, value): + """Internal helper method to add information to a category.""" + if isinstance(key_or_dict, dict): + self.metadata[category].update(key_or_dict) + else: + self.metadata[category][key_or_dict] = value + + def get_metadata(self): + return { + "parent_files": self.parent_files, + "metadata": self.metadata + } + + def clear_metadata(self): + self.metadata = { + "samples": {}, + "environment": {}, + "instrument": {}, + "data_level": {} + } + self.parent_files = [] + +# Example usage +if __name__ == "__main__": + parent_files = ["file1.txt", "file2.txt", "file3.txt"] + harvester = MetadataHarvester(parent_files) + + # Adding sample information + harvester.add_sample_info("sample_id", "S001") + harvester.add_sample_info({"sample_type": "blood", "sample_volume": "5ml"}) + + # Adding environment information + harvester.add_environment_info("location", "lab_1") + harvester.add_environment_info({"temperature": "22C", "humidity": "50%"}) + + # Adding instrument information + harvester.add_instrument_info("instrument_id", "I123") + harvester.add_instrument_info({"instrument_type": "microscope", "manufacturer": "XYZ Corp"}) + + # Adding data level information + harvester.add_data_level_info("processing_step", "initial") + harvester.add_data_level_info({"data_quality": "high", "data_format": "CSV"}) + + # Retrieving metadata + metadata = harvester.get_metadata() + print(metadata) + + # Clear metadata if needed + harvester.clear_metadata() + print("Cleared metadata:", harvester.get_metadata()) + + def main(): output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"