Moved git related operations from pipelines/ to src/git_ops.py
This commit is contained in:
@ -11,176 +11,6 @@ import src.hdf5_ops as hdf5_ops
|
||||
import src.git_ops as git_ops
|
||||
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
YAML_EXT = ".yaml"
|
||||
TXT_EXT = ".txt"
|
||||
|
||||
|
||||
|
||||
def get_review_status(filename_path):
|
||||
|
||||
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
||||
filename, ext = os.path.splitext(filename_path_head)
|
||||
# TODO:
|
||||
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'r') as f:
|
||||
workflow_steps = []
|
||||
for line in f:
|
||||
workflow_steps.append(line)
|
||||
return workflow_steps[-1]
|
||||
|
||||
def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs, restart = False):
|
||||
|
||||
"""
|
||||
First: Initialize review branch with review folder with a copy of yaml representation of
|
||||
hdf5 file under review and by creating a txt file with the state of the review process, e.g., under review.
|
||||
|
||||
"""
|
||||
|
||||
initials = reviewer_attrs['initials']
|
||||
#branch_name = '-'.join([reviewer_attrs['type'],'review_',initials])
|
||||
branch_name = '_'.join(['review',initials])
|
||||
|
||||
hdf5_file_path_tail, filename_path_head = os.path.split(hdf5_file_path)
|
||||
filename, ext = os.path.splitext(filename_path_head)
|
||||
|
||||
# Check file_path points to h5 file
|
||||
if not 'h5' in ext:
|
||||
raise ValueError("filename_path needs to point to an h5 file.")
|
||||
|
||||
# Verify if yaml snapshot of input h5 file exists
|
||||
if not os.path.exists(os.path.join(hdf5_file_path_tail,filename+YAML_EXT)):
|
||||
raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run serialize_metadata(filename_path) ")
|
||||
|
||||
# Initialize metadata review workflow
|
||||
# print("Create branch metadata-review-by-"+initials+"\n")
|
||||
|
||||
#checkout_review_branch(branch_name)
|
||||
|
||||
# Check you are working at the right branch
|
||||
|
||||
curr_branch = git_ops.show_current_branch()
|
||||
if not branch_name in curr_branch.stdout:
|
||||
raise ValueError("Branch "+branch_name+" was not found. \nPlease open a Git Bash Terminal, and follow the below instructions: \n1. Change directory to your project's directory. \n2. Excecute the command: git checkout "+branch_name)
|
||||
|
||||
# Check if review file already exists and then check if it is still untracked
|
||||
review_yaml_file_path = os.path.join("review/",filename+YAML_EXT)
|
||||
review_yaml_file_path_tail, ext = os.path.splitext(review_yaml_file_path)
|
||||
review_status_yaml_file_path = os.path.join(review_yaml_file_path_tail+"-review_status"+".txt")
|
||||
|
||||
if not os.path.exists(review_yaml_file_path) or restart:
|
||||
review_yaml_file_path = utils.make_file_copy(os.path.join(hdf5_file_path_tail,filename+YAML_EXT), 'review')
|
||||
if restart:
|
||||
print('metadata review has been reinitialized. The review files will reflect the current state of the hdf5 files metadata')
|
||||
|
||||
|
||||
|
||||
#if not os.path.exists(os.path.join(review_yaml_file_path_tail+"-review_status"+".txt")):
|
||||
|
||||
with open(review_status_yaml_file_path,'w') as f:
|
||||
f.write('under review')
|
||||
|
||||
# Stage untracked review files and commit them to local repository
|
||||
status = git_ops.get_status()
|
||||
untracked_files = []
|
||||
for line in status.stdout.splitlines():
|
||||
#tmp = line.decode("utf-8")
|
||||
#modified_files.append(tmp.split()[1])
|
||||
if 'review/' in line:
|
||||
if not 'modified' in line: # untracked filesand
|
||||
untracked_files.append(line.strip())
|
||||
else:
|
||||
untracked_files.append(line.strip().split()[1])
|
||||
|
||||
if 'output_files/'+filename+YAML_EXT in line and not 'modified' in line:
|
||||
untracked_files.append(line.strip())
|
||||
|
||||
if untracked_files:
|
||||
result = subprocess.run(git_ops.add_files_to_git(untracked_files),capture_output=True,check=True)
|
||||
message = 'Initialized metadata review.'
|
||||
commit_output = subprocess.run(git_ops.commit_changes(message),capture_output=True,check=True)
|
||||
|
||||
for line in commit_output.stdout.splitlines():
|
||||
print(line.decode('utf-8'))
|
||||
#else:
|
||||
# print('This action will not have any effect because metadata review process has been already initialized.')
|
||||
|
||||
|
||||
|
||||
|
||||
#status_dict = repo_obj.status()
|
||||
#for filepath, file_status in status_dict.items():
|
||||
# Identify keys associated to review files and stage them
|
||||
# if 'review/'+filename in filepath:
|
||||
# Stage changes
|
||||
# repo_obj.index.add(filepath)
|
||||
|
||||
#author = config_file.author #default_signature
|
||||
#committer = config_file.committer
|
||||
#message = "Initialized metadata review process."
|
||||
#tree = repo_obj.index.write_tree()
|
||||
#oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
||||
|
||||
#print("Add and commit"+"\n")
|
||||
|
||||
return review_yaml_file_path, review_status_yaml_file_path
|
||||
|
||||
|
||||
|
||||
def second_save_metadata_review(review_yaml_file_path, reviewer_attrs):
|
||||
"""
|
||||
Second: Once you're done reviewing the yaml representation of hdf5 file in review folder.
|
||||
Change the review status to complete and save (add and commit) modified .yalm and .txt files in the project by
|
||||
running this function.
|
||||
|
||||
"""
|
||||
# 1 verify review initializatin was performed first
|
||||
# 2. change review status in txt to complete
|
||||
# 3. git add review/ and git commit -m "Submitted metadata review"
|
||||
|
||||
initials = reviewer_attrs['initials']
|
||||
#branch_name = '-'.join([reviewer_attrs['type'],'review','by',initials])
|
||||
branch_name = '_'.join(['review',initials])
|
||||
# TODO: replace with subprocess + git
|
||||
#checkout_review_branch(repo_obj, branch_name)
|
||||
|
||||
# Check you are working at the right branch
|
||||
curr_branch = git_ops.show_current_branch()
|
||||
if not branch_name in curr_branch.stdout:
|
||||
raise ValueError('Please checkout ' + branch_name + ' via Git Bash before submitting metadata review files. ')
|
||||
|
||||
# Collect modified review files
|
||||
status = git_ops.get_status()
|
||||
modified_files = []
|
||||
os.path.basename(review_yaml_file_path)
|
||||
for line in status.stdout.splitlines():
|
||||
# conver line from bytes to str
|
||||
tmp = line.decode("utf-8")
|
||||
if 'modified' in tmp and 'review/' in tmp and os.path.basename(review_yaml_file_path) in tmp:
|
||||
modified_files.append(tmp.split()[1])
|
||||
|
||||
# Stage modified files and commit them to local repository
|
||||
review_yaml_file_path_tail, review_yaml_file_path_head = os.path.split(review_yaml_file_path)
|
||||
filename, ext = os.path.splitext(review_yaml_file_path_head)
|
||||
if modified_files:
|
||||
review_status_file_path = os.path.join("review/",filename+"-review_status"+TXT_EXT)
|
||||
with open(review_status_file_path,'a') as f:
|
||||
f.write('\nsubmitted')
|
||||
|
||||
modified_files.append(review_status_file_path)
|
||||
|
||||
result = subprocess.run(git_ops.add_files_to_git(modified_files),capture_output=True,check=True)
|
||||
message = 'Submitted metadata review.'
|
||||
commit_output = subprocess.run(git_ops.commit_changes(message),capture_output=True,check=True)
|
||||
|
||||
for line in commit_output.stdout.splitlines():
|
||||
print(line.decode('utf-8'))
|
||||
else:
|
||||
print('Nothing to commit.')
|
||||
|
||||
#
|
||||
def load_yaml(yaml_review_file):
|
||||
with open(yaml_review_file, 'r') as stream:
|
||||
try:
|
||||
@ -263,12 +93,10 @@ def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
|
||||
yaml_dict : dict
|
||||
Dictionary specifying objects and their attributes with operations. Example format:
|
||||
{
|
||||
"object_name": {
|
||||
"attr_name": {
|
||||
"value": attr_value,
|
||||
"delete": True/False
|
||||
}
|
||||
}
|
||||
"object_name": { "attributes" : "attr_name": { "value": attr_value,
|
||||
"delete": true | false
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
yaml_dict = load_yaml(yaml_review_file)
|
||||
@ -320,13 +148,6 @@ def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
|
||||
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
|
||||
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')
|
||||
|
||||
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_review_file, reviewer_attrs={}, hdf5_upload=False):
|
||||
if 'submitted' not in get_review_status(input_hdf5_file):
|
||||
raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().')
|
||||
|
||||
update_hdf5_file_with_review(input_hdf5_file, yaml_review_file)
|
||||
git_ops.perform_git_operations(hdf5_upload)
|
||||
|
||||
def count(hdf5_obj,yml_dict):
|
||||
print(hdf5_obj.name)
|
||||
if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4:
|
||||
@ -337,144 +158,6 @@ def count(hdf5_obj,yml_dict):
|
||||
count_delections = sum(deletions)
|
||||
print('additions',count_additions, 'deletions', count_delections)
|
||||
|
||||
def last_submit_metadata_review(reviewer_attrs):
|
||||
|
||||
"""Fourth: """
|
||||
|
||||
initials =reviewer_attrs['initials']
|
||||
|
||||
repository = 'origin'
|
||||
branch_name = '_'.join(['review',initials])
|
||||
|
||||
push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
||||
|
||||
list_branches_command = ['git','branch','--list']
|
||||
|
||||
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
|
||||
if not branch_name in branches.stdout:
|
||||
print('There is no branch named '+branch_name+'.\n')
|
||||
print('Make sure to run data owner review workflow from the beginning without missing any steps.')
|
||||
return
|
||||
|
||||
curr_branch = git_ops.show_current_branch()
|
||||
if not branch_name in curr_branch.stdout:
|
||||
print('Complete metadata review could not be completed.\n')
|
||||
print('Make sure a data-owner workflow has already been started on branch '+branch_name+'\n')
|
||||
print('The step "Complete metadata review" will have no effect.')
|
||||
return
|
||||
|
||||
|
||||
|
||||
# push
|
||||
result = subprocess.run(push_command(repository,branch_name),capture_output=True,text=True,check=True)
|
||||
print(result.stdout)
|
||||
|
||||
# 1. git add output_files/
|
||||
# 2. delete review/
|
||||
#shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
|
||||
# 3. git rm review/
|
||||
# 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date."
|
||||
return result.returncode
|
||||
|
||||
|
||||
#import config_file
|
||||
#import hdf5_ops
|
||||
|
||||
class MetadataHarvester:
|
||||
def __init__(self, parent_files=None):
|
||||
if parent_files is None:
|
||||
parent_files = []
|
||||
self.parent_files = parent_files
|
||||
self.metadata = {
|
||||
"project": {},
|
||||
"sample": {},
|
||||
"environment": {},
|
||||
"instruments": {},
|
||||
"datasets": {}
|
||||
}
|
||||
|
||||
def add_project_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("project", key_or_dict, value, append)
|
||||
|
||||
def add_sample_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("sample", key_or_dict, value, append)
|
||||
|
||||
def add_environment_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("environment", key_or_dict, value, append)
|
||||
|
||||
def add_instrument_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("instruments", key_or_dict, value, append)
|
||||
|
||||
def add_dataset_info(self, key_or_dict, value=None, append=False):
|
||||
self._add_info("datasets", key_or_dict, value, append)
|
||||
|
||||
def _add_info(self, category, key_or_dict, value, append):
|
||||
"""Internal helper method to add information to a category."""
|
||||
if isinstance(key_or_dict, dict):
|
||||
self.metadata[category].update(key_or_dict)
|
||||
else:
|
||||
if key_or_dict in self.metadata[category]:
|
||||
if append:
|
||||
current_value = self.metadata[category][key_or_dict]
|
||||
|
||||
if isinstance(current_value, list):
|
||||
|
||||
if not isinstance(value, list):
|
||||
# Append the new value to the list
|
||||
self.metadata[category][key_or_dict].append(value)
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = current_value + value
|
||||
|
||||
elif isinstance(current_value, str):
|
||||
# Append the new value as a comma-separated string
|
||||
self.metadata[category][key_or_dict] = current_value + ',' + str(value)
|
||||
else:
|
||||
# Handle other types (for completeness, usually not required)
|
||||
self.metadata[category][key_or_dict] = [current_value, value]
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = value
|
||||
else:
|
||||
self.metadata[category][key_or_dict] = value
|
||||
|
||||
def get_metadata(self):
|
||||
return {
|
||||
"parent_files": self.parent_files,
|
||||
"metadata": self.metadata
|
||||
}
|
||||
|
||||
def print_metadata(self):
|
||||
print("parent_files", self.parent_files)
|
||||
|
||||
for key in self.metadata.keys():
|
||||
print(key,'metadata:\n')
|
||||
for item in self.metadata[key].items():
|
||||
print(item[0],item[1])
|
||||
|
||||
|
||||
|
||||
def clear_metadata(self):
|
||||
self.metadata = {
|
||||
"project": {},
|
||||
"sample": {},
|
||||
"environment": {},
|
||||
"instruments": {},
|
||||
"datasets": {}
|
||||
}
|
||||
self.parent_files = []
|
||||
|
||||
def main():
|
||||
|
||||
output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"
|
||||
output_yml_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.yalm"
|
||||
output_yml_filename_path_tail, filename = os.path.split(output_yml_filename_path)
|
||||
#output_yml_filename_path = hdf5_ops.serialize_metadata(output_filename_path)
|
||||
|
||||
#first_initialize_metadata_review(output_filename_path,initials='NG')
|
||||
#second_submit_metadata_review()
|
||||
#if os.path.exists(os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename)):
|
||||
# third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename))
|
||||
#fourth_complete_metadata_review()
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python pipeline.py run <path/to/descriptor.json>")
|
||||
|
Reference in New Issue
Block a user