339 lines
14 KiB
Python
339 lines
14 KiB
Python
import sys
|
|
import os
|
|
root_dir = os.path.abspath(os.curdir)
|
|
sys.path.append(root_dir)
|
|
import subprocess
|
|
|
|
import h5py
|
|
import yaml
|
|
import shutil
|
|
import src.g5505_utils as utils
|
|
import src.hdf5_vis as hdf5_vis
|
|
import input_files.config_file as config_file
|
|
import src.hidden as hidden
|
|
|
|
import numpy as np
|
|
|
|
import pygit2 as pygit
|
|
|
|
|
|
|
|
YAML_EXT = ".yaml"
|
|
TXT_EXT = ".txt"
|
|
|
|
repo_obj = pygit.Repository(os.path.abspath(os.curdir))
|
|
|
|
|
|
def get_review_status(filename_path):
|
|
|
|
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
|
filename, ext = os.path.splitext(filename_path_head)
|
|
# TODO:
|
|
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'r') as f:
|
|
workflow_steps = []
|
|
for line in f:
|
|
workflow_steps.append(line)
|
|
return workflow_steps[-1]
|
|
|
|
def checkout_review_branch(repo_obj,initials):
|
|
# Create a new branch
|
|
branch_name = 'metadata-review-by-'+initials
|
|
head_commit = repo_obj.head.peel()# Get the commit hash associated with HEAD
|
|
|
|
if not branch_name in repo_obj.branches:
|
|
branch = repo_obj.create_branch(branch_name, head_commit)
|
|
else:
|
|
branch = repo_obj.branches[branch_name]
|
|
repo_obj.checkout(branch)
|
|
|
|
def first_initialize_metadata_review(filename_path,initials):
|
|
|
|
"""
|
|
Initialize metadata review by creating review folder with a copy of yaml representation of
|
|
hdf5 file under review and by creating a txt file with the state of the review process, e.g., under review.
|
|
|
|
"""
|
|
|
|
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
|
filename, ext = os.path.splitext(filename_path_head)
|
|
|
|
|
|
|
|
# Check file_path points to h5 file
|
|
if not 'h5' in ext:
|
|
raise ValueError("filename_path needs to point to a suitable h5 file.")
|
|
|
|
# Verify if yaml snapshot of input h5 file exists
|
|
if not os.path.exists(os.path.join(filename_path_tail,filename+YAML_EXT)):
|
|
raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run take_yml_snapshot_of_hdf5_file(filename_path) ")
|
|
|
|
# Initialize metadata review workflow
|
|
print("Create branch metadata-review-by-"+initials+"\n")
|
|
|
|
# Check if review file already exists and then check if it is still untracked
|
|
if not os.path.exists(os.path.join("review/",filename+YAML_EXT)):
|
|
review_filename_path = utils.make_file_copy(os.path.join(filename_path_tail,filename+YAML_EXT), 'review')
|
|
else:
|
|
review_filename_path = os.path.join("review/",filename+YAML_EXT)
|
|
#else:
|
|
# raise Warning("the file " + os.path.join("review/",filename+YAML_EXT)+ " already exists. Delete this file to reinitialize the metadata review process.")
|
|
|
|
|
|
review_filename_tail, ext = os.path.splitext(review_filename_path)
|
|
|
|
with open(os.path.join(review_filename_tail+"-review_status"+".txt"),'w') as f:
|
|
f.write('under review')
|
|
|
|
checkout_review_branch(repo_obj,initials)
|
|
|
|
status_dict = repo_obj.status()
|
|
for filepath, file_status in status_dict.items():
|
|
# Identify keys associated to review files and stage them
|
|
if 'review/'+filename in filepath:
|
|
# Stage changes
|
|
repo_obj.index.add(filepath)
|
|
|
|
author = config_file.author #default_signature
|
|
committer = config_file.committer
|
|
message = "Initialized metadata review process."
|
|
tree = repo_obj.index.write_tree()
|
|
oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
|
|
|
#print("Add and commit"+"\n")
|
|
|
|
|
|
|
|
def second_submit_metadata_review(filename_path, initials):
|
|
"""
|
|
Once you're done reviewing the yaml representation of hdf5 file in review folder.
|
|
Change the review status to complete and save (add and commit) modified .yalm and .txt files in the project by
|
|
running this function.
|
|
|
|
"""
|
|
# 1 verify review initializatin was performed first
|
|
# 2. change review status in txt to complete
|
|
# 3. git add review/ and git commit -m "Submitted metadata review"
|
|
|
|
|
|
if any([status in get_review_status(filename_path) for status in ['under review','submitted']]):
|
|
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
|
filename, ext = os.path.splitext(filename_path_head)
|
|
# TODO:
|
|
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'a') as f:
|
|
f.write('\nsubmitted')
|
|
|
|
# Create a new branch
|
|
branch_name = 'metadata-review-by-'+initials
|
|
head_commit = repo_obj.head.peel()# Get the commit hash associated with HEAD
|
|
|
|
if not branch_name in repo_obj.branches:
|
|
branch = repo_obj.create_branch(branch_name, head_commit)
|
|
else:
|
|
branch = repo_obj.branches[branch_name]
|
|
|
|
repo_obj.checkout(branch)
|
|
|
|
status_dict = repo_obj.status()
|
|
for filepath, file_status in status_dict.items():
|
|
# Identify keys associated to review files and stage them
|
|
if ('review/'+filename in filepath) and (file_status == pygit.GIT_STATUS_WT_MODIFIED):
|
|
# Stage changes
|
|
repo_obj.index.add(filepath)
|
|
|
|
author = config_file.author #default_signature
|
|
committer = config_file.committer
|
|
message = "Submitted metadata review."
|
|
tree = repo_obj.index.write_tree()
|
|
oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
|
|
|
|
|
|
|
def third_complete_metadata_review(initials):
|
|
|
|
push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
|
list_branches_command = ['git','branch','--list']
|
|
|
|
repository = 'origin'
|
|
branch_name = 'metadata-review-by-'+initials # refspec
|
|
|
|
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
|
|
if not branch_name in branches.stdout:
|
|
print('There is no branch named '+branch_name+'.\n')
|
|
print('Make sure to run metadata reviewer workflow from the beginning without missing any steps.')
|
|
return
|
|
|
|
current_branch_command = ['git','branch','--show-current']
|
|
curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
|
|
|
|
if not branch_name in curr_branch.stdout:
|
|
print('Complete metadata review could not be completed.\n')
|
|
print('Make sure a metadata-reviewer workflow has already been started on branch '+branch_name+'\n')
|
|
print('The step "Complete metadata review" will have no effect.')
|
|
return
|
|
|
|
result = subprocess.run(push_command(repository,branch_name),capture_output=True,check=True)
|
|
|
|
print(result.stdout)
|
|
|
|
return result.returncode
|
|
|
|
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file):
|
|
|
|
# compare review file with current yalm file and then based on the changes open hdf5 file and access only
|
|
# groups that changed :). the below approach is suboptimal
|
|
|
|
# TODO: only enable update if your branch is data owner :)
|
|
|
|
if not 'submitted' in get_review_status(input_hdf5_file):
|
|
raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().')
|
|
|
|
def count(name,obj,yml_dict):
|
|
print(obj.name)
|
|
if isinstance(obj,h5py.Group) and len(obj.name.split('/')) <= 4:
|
|
obj_review = yml_dict[obj.name]
|
|
|
|
additions = [not (item in obj.attrs.keys()) for item in obj_review['attributes'].keys()]
|
|
|
|
count_additions = sum(additions)
|
|
|
|
deletions = [not (item in obj_review['attributes'].keys()) for item in obj.attrs.keys()]
|
|
|
|
count_delections = sum(deletions)
|
|
|
|
print('additions',count_additions, 'deletions', count_delections)
|
|
|
|
with open(yaml_file,'r') as stream:
|
|
try:
|
|
yaml_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
|
except yaml.YAMLError as exc:
|
|
print(exc)
|
|
|
|
with h5py.File(input_hdf5_file,'r+') as f:
|
|
#f.visititems(lambda name, obj: count(name,obj,yaml_dict))
|
|
for key in yaml_dict.keys(): # keys should coincide with group names
|
|
print(key)
|
|
# Select hdf5 and yaml objects at key
|
|
hdf5_obj = f[key]
|
|
yaml_obj = yaml_dict[key]
|
|
|
|
count(hdf5_obj.name, hdf5_obj, yaml_dict)
|
|
|
|
for attr_name, attr_value in yaml_obj['attributes'].items():
|
|
#attr_value = yaml_obj['attributes'][attr_name]
|
|
|
|
# Check whether attr_name belongs to the existing attributes of hdf5_obj
|
|
if attr_name in hdf5_obj.attrs.keys():
|
|
#else: # renaming attribute and possibly change of value assigment
|
|
if isinstance(attr_value,dict):
|
|
# Retreive possibly new attribute's name and value
|
|
new_attr_name = attr_value.get('rename_as',attr_name) # if 'rename_as' is a key in attr_value returns the value, otherwise it return the existing value
|
|
new_attr_value = attr_value.get('value',hdf5_obj.attrs[attr_name])
|
|
|
|
hdf5_obj.attrs[new_attr_name] = new_attr_value
|
|
|
|
# Remove from hdf5_obj.attrs attribute w/ name: attr_name if
|
|
# yaml indicates a renaming of the attribute.
|
|
if not (new_attr_name == attr_name):
|
|
hdf5_obj.attrs.__delitem__(attr_name)
|
|
else:
|
|
hdf5_obj.attrs[attr_name] = attr_value
|
|
else: # attribute inclusion
|
|
if isinstance(attr_value,dict):
|
|
# Retreive new attribute's name and value
|
|
new_attr_name = attr_value.get('rename_as',attr_name) # if 'rename_as' is a key in attr_value returns the value, otherwise it return the existing value
|
|
new_attr_value = attr_value.get('value',np.nan) # TODO: let the user know why np.nan might have been assigned
|
|
hdf5_obj.attrs[new_attr_name] = new_attr_value
|
|
else:
|
|
hdf5_obj.attrs[attr_name] = attr_value
|
|
|
|
# Recreate/or update yaml representation of updated input_hdf5_file.
|
|
output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(input_hdf5_file)
|
|
|
|
|
|
status_command = ['git','status']
|
|
add_command = lambda add_list: ['git','add'] + add_list
|
|
commit_command = lambda message: ['git','commit','-m', message]
|
|
push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
|
|
|
status = subprocess.run(status_command,capture_output=True,check=True)
|
|
|
|
files_to_add_list = []
|
|
for line in status.stdout.splitlines():
|
|
# conver line from bytes to str
|
|
tmp = line.decode("utf-8")
|
|
if 'modified' in tmp:
|
|
if any([ext in tmp for ext in ['.h5','.yaml']] ):
|
|
files_to_add_list.append(tmp.split()[1])
|
|
if files_to_add_list:
|
|
output = subprocess.run(add_command(files_to_add_list),capture_output=True,check=True)
|
|
# TODO:verify if files were correctly staged
|
|
#status = subprocess.run(status_command,capture_output=True,check=True)
|
|
message = 'Updated hdf5 file with yaml review file.'
|
|
commit_output = subprocess.run(commit_command(message),capture_output=True,check=True)
|
|
else:
|
|
print("There were no found h5 and yaml files, needing to be saved. This action will not have effect on the review process' commit history.")
|
|
|
|
#with open('review/review_status.txt','r+') as f:
|
|
# f.write('hdf5 file updated w/ metadata review')
|
|
|
|
#try:
|
|
# yaml_dict = yaml.load(stream, Loader=yaml.FullLoader)
|
|
#except yaml.YAMLError as exc:
|
|
# print(exc)
|
|
|
|
|
|
def fourth_complete_metadata_review(initials):
|
|
|
|
repository = 'origin'
|
|
branch_name = 'data-owner-review-by-'+initials
|
|
|
|
current_branch_command = ['git','branch','--show-current']
|
|
list_branches_command = ['git','branch','--list']
|
|
|
|
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
|
|
if not branch_name in branches.stdout:
|
|
print('There is no branch named '+branch_name+'.\n')
|
|
print('Make sure to run data owner review workflow from the beginning without missing any steps.')
|
|
return
|
|
|
|
curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
|
|
if not branch_name in curr_branch.stdout:
|
|
print('Complete metadata review could not be completed.\n')
|
|
print('Make sure a data-owner workflow has already been started on branch '+branch_name+'\n')
|
|
print('The step "Complete metadata review" will have no effect.')
|
|
return
|
|
|
|
push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
|
|
|
# push
|
|
result = subprocess.run(push_command(repository,branch_name),capture_output=True,text=True,check=True)
|
|
print(result.stdout)
|
|
|
|
# 1. git add output_files/
|
|
# 2. delete review/
|
|
#shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
|
|
# 3. git rm review/
|
|
# 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date."
|
|
return True
|
|
|
|
|
|
#import config_file
|
|
#import hdf5_vis
|
|
|
|
def main():
|
|
|
|
output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"
|
|
output_yml_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.yalm"
|
|
output_yml_filename_path_tail, filename = os.path.split(output_yml_filename_path)
|
|
#output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename_path)
|
|
|
|
#first_initialize_metadata_review(output_filename_path,initials='NG')
|
|
#second_submit_metadata_review()
|
|
#if os.path.exists(os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename)):
|
|
# third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename))
|
|
#fourth_complete_metadata_review()
|
|
|
|
#if __name__ == '__main__':
|
|
|
|
# main()
|