From 37fd603943a29d72392d2a8ce08fba1631b79e61 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 28 Mar 2024 13:59:47 +0100 Subject: [PATCH] Completed first version of metadata_review_lib.py. Still need to test and correct possible bugs. --- src/metadata_review_lib.py | 91 +++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 5 deletions(-) diff --git a/src/metadata_review_lib.py b/src/metadata_review_lib.py index 2bf7ef3..365ed39 100644 --- a/src/metadata_review_lib.py +++ b/src/metadata_review_lib.py @@ -2,6 +2,7 @@ import sys import os root_dir = os.path.abspath(os.curdir) sys.path.append(root_dir) +import subprocess import h5py import yaml @@ -9,11 +10,14 @@ import shutil import src.g5505_utils as utils import src.hdf5_vis as hdf5_vis import input_files.config_file as config_file +import src.hidden as hidden import numpy as np import pygit2 as pygit + + YAML_EXT = ".yaml" TXT_EXT = ".txt" @@ -142,16 +146,44 @@ def second_submit_metadata_review(filename_path, initials): tree = repo_obj.index.write_tree() oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid]) - -def third_complete_metadata_review(): - return + +def third_complete_metadata_review(initials): + + push_command = lambda repository,refspec: ['git','push',repository,refspec] + list_branches_command = ['git','branch','--list'] + + repository = 'origin' + branch_name = 'metadata-review-by-'+initials # refspec + + branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True) + if not branch_name in branches.stdout: + print('There is no branch named '+branch_name+'.\n') + print('Make sure to run metadata reviewer workflow from the beginning without missing any steps.') + return + + current_branch_command = ['git','branch','--show-current'] + curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True) + + if not branch_name in curr_branch.stdout: + print('Complete metadata review could not be completed.\n') + print('Make sure a metadata-reviewer workflow has already been started on branch '+branch_name+'\n') + print('The step "Complete metadata review" will have no effect.') + return + + result = subprocess.run(push_command(repository,branch_name),capture_output=True,check=True) + + print(result.stdout) + + return result.returncode def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file): # compare review file with current yalm file and then based on the changes open hdf5 file and access only # groups that changed :). the below approach is suboptimal + # TODO: only enable update if your branch is data owner :) + if not 'submitted' in get_review_status(input_hdf5_file): raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().') @@ -217,6 +249,30 @@ def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file): # Recreate/or update yaml representation of updated input_hdf5_file. output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(input_hdf5_file) + + status_command = ['git','status'] + add_command = lambda add_list: ['git','add'] + add_list + commit_command = lambda message: ['git','commit','-m', message] + push_command = lambda repository,refspec: ['git','push',repository,refspec] + + status = subprocess.run(status_command,capture_output=True,check=True) + + files_to_add_list = [] + for line in status.stdout.splitlines(): + # conver line from bytes to str + tmp = line.decode("utf-8") + if 'modified' in tmp: + if any([ext in tmp for ext in ['.h5','.yaml']] ): + files_to_add_list.append(tmp.split()[1]) + if files_to_add_list: + output = subprocess.run(add_command(files_to_add_list),capture_output=True,check=True) + # TODO:verify if files were correctly staged + #status = subprocess.run(status_command,capture_output=True,check=True) + message = 'Updated hdf5 file with yaml review file.' + commit_output = subprocess.run(commit_command(message),capture_output=True,check=True) + else: + print("There were no found h5 and yaml files, needing to be saved. This action will not have effect on the review process' commit history.") + #with open('review/review_status.txt','r+') as f: # f.write('hdf5 file updated w/ metadata review') @@ -226,11 +282,36 @@ def third_update_hdf5_file_with_review(input_hdf5_file, yaml_file): # print(exc) -def fourth_complete_metadata_review(): +def fourth_complete_metadata_review(initials): + + repository = 'origin' + branch_name = 'data-owner-review-by-'+initials + + current_branch_command = ['git','branch','--show-current'] + list_branches_command = ['git','branch','--list'] + + branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True) + if not branch_name in branches.stdout: + print('There is no branch named '+branch_name+'.\n') + print('Make sure to run data owner review workflow from the beginning without missing any steps.') + return + + curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True) + if not branch_name in curr_branch.stdout: + print('Complete metadata review could not be completed.\n') + print('Make sure a data-owner workflow has already been started on branch '+branch_name+'\n') + print('The step "Complete metadata review" will have no effect.') + return + + push_command = lambda repository,refspec: ['git','push',repository,refspec] + + # push + result = subprocess.run(push_command(repository,branch_name),capture_output=True,text=True,check=True) + print(result.stdout) # 1. git add output_files/ # 2. delete review/ - shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review")) + #shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review")) # 3. git rm review/ # 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date." return True