Completed first version of metadata_review_lib.py. Still need to test and correct possible bugs.

This commit is contained in:
2024-03-28 13:59:47 +01:00
parent f0af30f7e8
commit 37fd603943

View File

@ -2,6 +2,7 @@ import sys
import os
root_dir = os.path.abspath(os.curdir)
sys.path.append(root_dir)
import subprocess
import h5py
import yaml
@ -9,11 +10,14 @@ import shutil
import src.g5505_utils as utils
import src.hdf5_vis as hdf5_vis
import input_files.config_file as config_file
import src.hidden as hidden
import numpy as np
import pygit2 as pygit
YAML_EXT = ".yaml"
TXT_EXT = ".txt"
@ -142,16 +146,44 @@ def second_submit_metadata_review(filename_path, initials):
tree = repo_obj.index.write_tree()
oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
def third_complete_metadata_review():
return
def third_complete_metadata_review(initials):
push_command = lambda repository,refspec: ['git','push',repository,refspec]
list_branches_command = ['git','branch','--list']
repository = 'origin'
branch_name = 'metadata-review-by-'+initials # refspec
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
if not branch_name in branches.stdout:
print('There is no branch named '+branch_name+'.\n')
print('Make sure to run metadata reviewer workflow from the beginning without missing any steps.')
return
current_branch_command = ['git','branch','--show-current']
curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
if not branch_name in curr_branch.stdout:
print('Complete metadata review could not be completed.\n')
print('Make sure a metadata-reviewer workflow has already been started on branch '+branch_name+'\n')
print('The step "Complete metadata review" will have no effect.')
return
result = subprocess.run(push_command(repository,branch_name),capture_output=True,check=True)
print(result.stdout)
return result.returncode
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file):
# compare review file with current yalm file and then based on the changes open hdf5 file and access only
# groups that changed :). the below approach is suboptimal
# TODO: only enable update if your branch is data owner :)
if not 'submitted' in get_review_status(input_hdf5_file):
raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().')
@ -217,6 +249,30 @@ def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file):
# Recreate/or update yaml representation of updated input_hdf5_file.
output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(input_hdf5_file)
status_command = ['git','status']
add_command = lambda add_list: ['git','add'] + add_list
commit_command = lambda message: ['git','commit','-m', message]
push_command = lambda repository,refspec: ['git','push',repository,refspec]
status = subprocess.run(status_command,capture_output=True,check=True)
files_to_add_list = []
for line in status.stdout.splitlines():
# conver line from bytes to str
tmp = line.decode("utf-8")
if 'modified' in tmp:
if any([ext in tmp for ext in ['.h5','.yaml']] ):
files_to_add_list.append(tmp.split()[1])
if files_to_add_list:
output = subprocess.run(add_command(files_to_add_list),capture_output=True,check=True)
# TODO:verify if files were correctly staged
#status = subprocess.run(status_command,capture_output=True,check=True)
message = 'Updated hdf5 file with yaml review file.'
commit_output = subprocess.run(commit_command(message),capture_output=True,check=True)
else:
print("There were no found h5 and yaml files, needing to be saved. This action will not have effect on the review process' commit history.")
#with open('review/review_status.txt','r+') as f:
# f.write('hdf5 file updated w/ metadata review')
@ -226,11 +282,36 @@ def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file):
# print(exc)
def fourth_complete_metadata_review():
def fourth_complete_metadata_review(initials):
repository = 'origin'
branch_name = 'data-owner-review-by-'+initials
current_branch_command = ['git','branch','--show-current']
list_branches_command = ['git','branch','--list']
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
if not branch_name in branches.stdout:
print('There is no branch named '+branch_name+'.\n')
print('Make sure to run data owner review workflow from the beginning without missing any steps.')
return
curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
if not branch_name in curr_branch.stdout:
print('Complete metadata review could not be completed.\n')
print('Make sure a data-owner workflow has already been started on branch '+branch_name+'\n')
print('The step "Complete metadata review" will have no effect.')
return
push_command = lambda repository,refspec: ['git','push',repository,refspec]
# push
result = subprocess.run(push_command(repository,branch_name),capture_output=True,text=True,check=True)
print(result.stdout)
# 1. git add output_files/
# 2. delete review/
shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
#shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
# 3. git rm review/
# 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date."
return True