Implemented metadata review library
This commit is contained in:
216
src/metadata_review_lib.py
Normal file
216
src/metadata_review_lib.py
Normal file
@ -0,0 +1,216 @@
|
||||
import sys
|
||||
import os
|
||||
root_dir = os.path.abspath(os.curdir)
|
||||
sys.path.append(root_dir)
|
||||
|
||||
import h5py
|
||||
import yaml
|
||||
import shutil
|
||||
import src.g5505_utils as utils
|
||||
import src.hdf5_vis as hdf5_vis
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pygit2 as pygit
|
||||
|
||||
YAML_EXT = ".yaml"
|
||||
TXT_EXT = ".txt"
|
||||
|
||||
repo_obj = pygit.Repository(os.path.abspath(os.curdir))
|
||||
|
||||
|
||||
def get_review_status(filename_path):
|
||||
|
||||
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
||||
filename, ext = os.path.splitext(filename_path_head)
|
||||
# TODO:
|
||||
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'r') as f:
|
||||
workflow_steps = []
|
||||
for line in f:
|
||||
workflow_steps.append(line)
|
||||
return workflow_steps[-1]
|
||||
|
||||
|
||||
def first_initialize_metadata_review(filename_path,initials):
|
||||
|
||||
"""
|
||||
Initialize metadata review by creating review folder with a copy of yaml representation of
|
||||
hdf5 file under review and by creating a txt file with the state of the review process, e.g., under review.
|
||||
|
||||
"""
|
||||
|
||||
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
||||
filename, ext = os.path.splitext(filename_path_head)
|
||||
|
||||
|
||||
|
||||
# Check file_path points to h5 file
|
||||
if not 'h5' in ext:
|
||||
raise ValueError("filename_path needs to point to a suitable h5 file.")
|
||||
|
||||
# Verify if yaml snapshot of input h5 file exists
|
||||
if not os.path.exists(os.path.join(filename_path_tail,filename+YAML_EXT)):
|
||||
raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run take_yml_snapshot_of_hdf5_file(filename_path) ")
|
||||
|
||||
# Initialize metadata review workflow
|
||||
print("Create branch metadata-review-by-"+initials+"\n")
|
||||
|
||||
if not os.path.exists(os.path.join("review/",filename+YAML_EXT)):
|
||||
review_filename_path = utils.make_file_copy(os.path.join(filename_path_tail,filename+YAML_EXT), 'review')
|
||||
else:
|
||||
raise Warning("the file " + os.path.join("review/",filename+YAML_EXT)+ " already exists. Delete this file to reinitialize the metadata review process.")
|
||||
|
||||
review_filename_tail, ext = os.path.splitext(review_filename_path)
|
||||
|
||||
with open(os.path.join(review_filename_tail+"-review_status"+".txt"),'w') as f:
|
||||
f.write('under review')
|
||||
|
||||
|
||||
# Create a new branch
|
||||
branch_name = 'metadata-review-by-'+initials
|
||||
head_commit = repo_obj.head.peel()# Get the commit hash associated with HEAD
|
||||
|
||||
if not branch_name in repo_obj.branches:
|
||||
new_branch = repo_obj.create_branch(branch_name, head_commit)
|
||||
repo_obj.checkout(new_branch)
|
||||
|
||||
status_dict = repo_obj.status()
|
||||
for key in status_dict:
|
||||
# Identify keys associated to review files and stage them
|
||||
if review_filename_tail in key:
|
||||
# Stage changes
|
||||
repo_obj.index.add(key)
|
||||
repo_obj.create_commit(message="Initialized metadata review process.")
|
||||
|
||||
#print("Add and commit"+"\n")
|
||||
|
||||
|
||||
|
||||
def second_submit_metadata_review(filename_path):
|
||||
"""
|
||||
Once you're done reviewing the yaml representation of hdf5 file in review folder.
|
||||
Change the review status to complete and save (add and commit) modified .yalm and .txt files in the project by
|
||||
running this function.
|
||||
|
||||
"""
|
||||
# 1 verify review initializatin was performed first
|
||||
# 2. change review status in txt to complete
|
||||
# 3. git add review/ and git commit -m "Submitted metadata review"
|
||||
|
||||
|
||||
if any([status in get_review_status(filename_path) for status in ['under review','submitted']]):
|
||||
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
||||
filename, ext = os.path.splitext(filename_path_head)
|
||||
# TODO:
|
||||
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'a') as f:
|
||||
f.write('\nsubmitted')
|
||||
#return True
|
||||
|
||||
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file):
|
||||
|
||||
# compare review file with current yalm file and then based on the changes open hdf5 file and access only
|
||||
# groups that changed :). the below approach is suboptimal
|
||||
|
||||
if not 'submitted' in get_review_status(input_hdf5_file):
|
||||
raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().')
|
||||
|
||||
def count(name,obj,yml_dict):
|
||||
print(obj.name)
|
||||
if isinstance(obj,h5py.Group) and len(obj.name.split('/')) <= 4:
|
||||
obj_review = yml_dict[obj.name]
|
||||
|
||||
additions = [not (item in obj.attrs.keys()) for item in obj_review['attributes'].keys()]
|
||||
|
||||
count_additions = sum(additions)
|
||||
|
||||
deletions = [not (item in obj_review['attributes'].keys()) for item in obj.attrs.keys()]
|
||||
|
||||
count_delections = sum(deletions)
|
||||
|
||||
print('additions',count_additions, 'deletions', count_delections)
|
||||
|
||||
with open(yaml_file,'r') as stream:
|
||||
try:
|
||||
yaml_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||
except yaml.YAMLError as exc:
|
||||
print(exc)
|
||||
|
||||
with h5py.File(input_hdf5_file,'r+') as f:
|
||||
#f.visititems(lambda name, obj: count(name,obj,yaml_dict))
|
||||
for key in yaml_dict.keys(): # keys should coincide with group names
|
||||
print(key)
|
||||
# Select hdf5 and yaml objects at key
|
||||
hdf5_obj = f[key]
|
||||
yaml_obj = yaml_dict[key]
|
||||
|
||||
count(hdf5_obj.name, hdf5_obj, yaml_dict)
|
||||
|
||||
for attr_name, attr_value in yaml_obj['attributes'].items():
|
||||
#attr_value = yaml_obj['attributes'][attr_name]
|
||||
|
||||
# Check whether attr_name belongs to the existing attributes of hdf5_obj
|
||||
if attr_name in hdf5_obj.attrs.keys():
|
||||
#else: # renaming attribute and possibly change of value assigment
|
||||
if isinstance(attr_value,dict):
|
||||
# Retreive possibly new attribute's name and value
|
||||
new_attr_name = attr_value.get('rename_as',attr_name) # if 'rename_as' is a key in attr_value returns the value, otherwise it return the existing value
|
||||
new_attr_value = attr_value.get('value',hdf5_obj.attrs[attr_name])
|
||||
|
||||
hdf5_obj.attrs[new_attr_name] = new_attr_value
|
||||
|
||||
# Remove from hdf5_obj.attrs attribute w/ name: attr_name if
|
||||
# yaml indicates a renaming of the attribute.
|
||||
if not (new_attr_name == attr_name):
|
||||
hdf5_obj.attrs.__delitem__(attr_name)
|
||||
else:
|
||||
hdf5_obj.attrs[attr_name] = attr_value
|
||||
else: # attribute inclusion
|
||||
if isinstance(attr_value,dict):
|
||||
# Retreive new attribute's name and value
|
||||
new_attr_name = attr_value.get('rename_as',attr_name) # if 'rename_as' is a key in attr_value returns the value, otherwise it return the existing value
|
||||
new_attr_value = attr_value.get('value',np.nan) # TODO: let the user know why np.nan might have been assigned
|
||||
hdf5_obj.attrs[new_attr_name] = new_attr_value
|
||||
else:
|
||||
hdf5_obj.attrs[attr_name] = attr_value
|
||||
|
||||
# Recreate/or update yaml representation of updated input_hdf5_file.
|
||||
output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(input_hdf5_file)
|
||||
|
||||
#with open('review/review_status.txt','r+') as f:
|
||||
# f.write('hdf5 file updated w/ metadata review')
|
||||
|
||||
#try:
|
||||
# yaml_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
||||
#except yaml.YAMLError as exc:
|
||||
# print(exc)
|
||||
|
||||
|
||||
def fourth_complete_metadata_review():
|
||||
|
||||
# 1. git add output_files/
|
||||
# 2. delete review/
|
||||
shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
|
||||
# 3. git rm review/
|
||||
# 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date."
|
||||
return True
|
||||
|
||||
|
||||
#import config_file
|
||||
#import hdf5_vis
|
||||
|
||||
def main():
|
||||
|
||||
output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"
|
||||
output_yml_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.yalm"
|
||||
output_yml_filename_path_tail, filename = os.path.split(output_yml_filename_path)
|
||||
#output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename_path)
|
||||
|
||||
#first_initialize_metadata_review(output_filename_path,initials='NG')
|
||||
#second_submit_metadata_review()
|
||||
#if os.path.exists(os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename)):
|
||||
# third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename))
|
||||
#fourth_complete_metadata_review()
|
||||
|
||||
#if __name__ == '__main__':
|
||||
|
||||
# main()
|
Reference in New Issue
Block a user