Moved git related operations from pipelines/ to src/git_ops.py
This commit is contained in:
@ -11,176 +11,6 @@ import src.hdf5_ops as hdf5_ops
|
|||||||
import src.git_ops as git_ops
|
import src.git_ops as git_ops
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
YAML_EXT = ".yaml"
|
|
||||||
TXT_EXT = ".txt"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_review_status(filename_path):
|
|
||||||
|
|
||||||
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
|
||||||
filename, ext = os.path.splitext(filename_path_head)
|
|
||||||
# TODO:
|
|
||||||
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'r') as f:
|
|
||||||
workflow_steps = []
|
|
||||||
for line in f:
|
|
||||||
workflow_steps.append(line)
|
|
||||||
return workflow_steps[-1]
|
|
||||||
|
|
||||||
def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs, restart = False):
|
|
||||||
|
|
||||||
"""
|
|
||||||
First: Initialize review branch with review folder with a copy of yaml representation of
|
|
||||||
hdf5 file under review and by creating a txt file with the state of the review process, e.g., under review.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
initials = reviewer_attrs['initials']
|
|
||||||
#branch_name = '-'.join([reviewer_attrs['type'],'review_',initials])
|
|
||||||
branch_name = '_'.join(['review',initials])
|
|
||||||
|
|
||||||
hdf5_file_path_tail, filename_path_head = os.path.split(hdf5_file_path)
|
|
||||||
filename, ext = os.path.splitext(filename_path_head)
|
|
||||||
|
|
||||||
# Check file_path points to h5 file
|
|
||||||
if not 'h5' in ext:
|
|
||||||
raise ValueError("filename_path needs to point to an h5 file.")
|
|
||||||
|
|
||||||
# Verify if yaml snapshot of input h5 file exists
|
|
||||||
if not os.path.exists(os.path.join(hdf5_file_path_tail,filename+YAML_EXT)):
|
|
||||||
raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run serialize_metadata(filename_path) ")
|
|
||||||
|
|
||||||
# Initialize metadata review workflow
|
|
||||||
# print("Create branch metadata-review-by-"+initials+"\n")
|
|
||||||
|
|
||||||
#checkout_review_branch(branch_name)
|
|
||||||
|
|
||||||
# Check you are working at the right branch
|
|
||||||
|
|
||||||
curr_branch = git_ops.show_current_branch()
|
|
||||||
if not branch_name in curr_branch.stdout:
|
|
||||||
raise ValueError("Branch "+branch_name+" was not found. \nPlease open a Git Bash Terminal, and follow the below instructions: \n1. Change directory to your project's directory. \n2. Excecute the command: git checkout "+branch_name)
|
|
||||||
|
|
||||||
# Check if review file already exists and then check if it is still untracked
|
|
||||||
review_yaml_file_path = os.path.join("review/",filename+YAML_EXT)
|
|
||||||
review_yaml_file_path_tail, ext = os.path.splitext(review_yaml_file_path)
|
|
||||||
review_status_yaml_file_path = os.path.join(review_yaml_file_path_tail+"-review_status"+".txt")
|
|
||||||
|
|
||||||
if not os.path.exists(review_yaml_file_path) or restart:
|
|
||||||
review_yaml_file_path = utils.make_file_copy(os.path.join(hdf5_file_path_tail,filename+YAML_EXT), 'review')
|
|
||||||
if restart:
|
|
||||||
print('metadata review has been reinitialized. The review files will reflect the current state of the hdf5 files metadata')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if not os.path.exists(os.path.join(review_yaml_file_path_tail+"-review_status"+".txt")):
|
|
||||||
|
|
||||||
with open(review_status_yaml_file_path,'w') as f:
|
|
||||||
f.write('under review')
|
|
||||||
|
|
||||||
# Stage untracked review files and commit them to local repository
|
|
||||||
status = git_ops.get_status()
|
|
||||||
untracked_files = []
|
|
||||||
for line in status.stdout.splitlines():
|
|
||||||
#tmp = line.decode("utf-8")
|
|
||||||
#modified_files.append(tmp.split()[1])
|
|
||||||
if 'review/' in line:
|
|
||||||
if not 'modified' in line: # untracked filesand
|
|
||||||
untracked_files.append(line.strip())
|
|
||||||
else:
|
|
||||||
untracked_files.append(line.strip().split()[1])
|
|
||||||
|
|
||||||
if 'output_files/'+filename+YAML_EXT in line and not 'modified' in line:
|
|
||||||
untracked_files.append(line.strip())
|
|
||||||
|
|
||||||
if untracked_files:
|
|
||||||
result = subprocess.run(git_ops.add_files_to_git(untracked_files),capture_output=True,check=True)
|
|
||||||
message = 'Initialized metadata review.'
|
|
||||||
commit_output = subprocess.run(git_ops.commit_changes(message),capture_output=True,check=True)
|
|
||||||
|
|
||||||
for line in commit_output.stdout.splitlines():
|
|
||||||
print(line.decode('utf-8'))
|
|
||||||
#else:
|
|
||||||
# print('This action will not have any effect because metadata review process has been already initialized.')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#status_dict = repo_obj.status()
|
|
||||||
#for filepath, file_status in status_dict.items():
|
|
||||||
# Identify keys associated to review files and stage them
|
|
||||||
# if 'review/'+filename in filepath:
|
|
||||||
# Stage changes
|
|
||||||
# repo_obj.index.add(filepath)
|
|
||||||
|
|
||||||
#author = config_file.author #default_signature
|
|
||||||
#committer = config_file.committer
|
|
||||||
#message = "Initialized metadata review process."
|
|
||||||
#tree = repo_obj.index.write_tree()
|
|
||||||
#oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
|
||||||
|
|
||||||
#print("Add and commit"+"\n")
|
|
||||||
|
|
||||||
return review_yaml_file_path, review_status_yaml_file_path
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def second_save_metadata_review(review_yaml_file_path, reviewer_attrs):
|
|
||||||
"""
|
|
||||||
Second: Once you're done reviewing the yaml representation of hdf5 file in review folder.
|
|
||||||
Change the review status to complete and save (add and commit) modified .yalm and .txt files in the project by
|
|
||||||
running this function.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# 1 verify review initializatin was performed first
|
|
||||||
# 2. change review status in txt to complete
|
|
||||||
# 3. git add review/ and git commit -m "Submitted metadata review"
|
|
||||||
|
|
||||||
initials = reviewer_attrs['initials']
|
|
||||||
#branch_name = '-'.join([reviewer_attrs['type'],'review','by',initials])
|
|
||||||
branch_name = '_'.join(['review',initials])
|
|
||||||
# TODO: replace with subprocess + git
|
|
||||||
#checkout_review_branch(repo_obj, branch_name)
|
|
||||||
|
|
||||||
# Check you are working at the right branch
|
|
||||||
curr_branch = git_ops.show_current_branch()
|
|
||||||
if not branch_name in curr_branch.stdout:
|
|
||||||
raise ValueError('Please checkout ' + branch_name + ' via Git Bash before submitting metadata review files. ')
|
|
||||||
|
|
||||||
# Collect modified review files
|
|
||||||
status = git_ops.get_status()
|
|
||||||
modified_files = []
|
|
||||||
os.path.basename(review_yaml_file_path)
|
|
||||||
for line in status.stdout.splitlines():
|
|
||||||
# conver line from bytes to str
|
|
||||||
tmp = line.decode("utf-8")
|
|
||||||
if 'modified' in tmp and 'review/' in tmp and os.path.basename(review_yaml_file_path) in tmp:
|
|
||||||
modified_files.append(tmp.split()[1])
|
|
||||||
|
|
||||||
# Stage modified files and commit them to local repository
|
|
||||||
review_yaml_file_path_tail, review_yaml_file_path_head = os.path.split(review_yaml_file_path)
|
|
||||||
filename, ext = os.path.splitext(review_yaml_file_path_head)
|
|
||||||
if modified_files:
|
|
||||||
review_status_file_path = os.path.join("review/",filename+"-review_status"+TXT_EXT)
|
|
||||||
with open(review_status_file_path,'a') as f:
|
|
||||||
f.write('\nsubmitted')
|
|
||||||
|
|
||||||
modified_files.append(review_status_file_path)
|
|
||||||
|
|
||||||
result = subprocess.run(git_ops.add_files_to_git(modified_files),capture_output=True,check=True)
|
|
||||||
message = 'Submitted metadata review.'
|
|
||||||
commit_output = subprocess.run(git_ops.commit_changes(message),capture_output=True,check=True)
|
|
||||||
|
|
||||||
for line in commit_output.stdout.splitlines():
|
|
||||||
print(line.decode('utf-8'))
|
|
||||||
else:
|
|
||||||
print('Nothing to commit.')
|
|
||||||
|
|
||||||
#
|
|
||||||
def load_yaml(yaml_review_file):
|
def load_yaml(yaml_review_file):
|
||||||
with open(yaml_review_file, 'r') as stream:
|
with open(yaml_review_file, 'r') as stream:
|
||||||
try:
|
try:
|
||||||
@ -263,10 +93,8 @@ def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
|
|||||||
yaml_dict : dict
|
yaml_dict : dict
|
||||||
Dictionary specifying objects and their attributes with operations. Example format:
|
Dictionary specifying objects and their attributes with operations. Example format:
|
||||||
{
|
{
|
||||||
"object_name": {
|
"object_name": { "attributes" : "attr_name": { "value": attr_value,
|
||||||
"attr_name": {
|
"delete": true | false
|
||||||
"value": attr_value,
|
|
||||||
"delete": True/False
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -320,13 +148,6 @@ def update_hdf5_file_with_review(input_hdf5_file, yaml_review_file):
|
|||||||
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
|
output_yml_filename_path = hdf5_ops.serialize_metadata(input_hdf5_file)
|
||||||
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')
|
print(f'{output_yml_filename_path} was successfully regenerated from the updated version of{input_hdf5_file}')
|
||||||
|
|
||||||
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_review_file, reviewer_attrs={}, hdf5_upload=False):
|
|
||||||
if 'submitted' not in get_review_status(input_hdf5_file):
|
|
||||||
raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().')
|
|
||||||
|
|
||||||
update_hdf5_file_with_review(input_hdf5_file, yaml_review_file)
|
|
||||||
git_ops.perform_git_operations(hdf5_upload)
|
|
||||||
|
|
||||||
def count(hdf5_obj,yml_dict):
|
def count(hdf5_obj,yml_dict):
|
||||||
print(hdf5_obj.name)
|
print(hdf5_obj.name)
|
||||||
if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4:
|
if isinstance(hdf5_obj,h5py.Group) and len(hdf5_obj.name.split('/')) <= 4:
|
||||||
@ -337,144 +158,6 @@ def count(hdf5_obj,yml_dict):
|
|||||||
count_delections = sum(deletions)
|
count_delections = sum(deletions)
|
||||||
print('additions',count_additions, 'deletions', count_delections)
|
print('additions',count_additions, 'deletions', count_delections)
|
||||||
|
|
||||||
def last_submit_metadata_review(reviewer_attrs):
|
|
||||||
|
|
||||||
"""Fourth: """
|
|
||||||
|
|
||||||
initials =reviewer_attrs['initials']
|
|
||||||
|
|
||||||
repository = 'origin'
|
|
||||||
branch_name = '_'.join(['review',initials])
|
|
||||||
|
|
||||||
push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
|
||||||
|
|
||||||
list_branches_command = ['git','branch','--list']
|
|
||||||
|
|
||||||
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
|
|
||||||
if not branch_name in branches.stdout:
|
|
||||||
print('There is no branch named '+branch_name+'.\n')
|
|
||||||
print('Make sure to run data owner review workflow from the beginning without missing any steps.')
|
|
||||||
return
|
|
||||||
|
|
||||||
curr_branch = git_ops.show_current_branch()
|
|
||||||
if not branch_name in curr_branch.stdout:
|
|
||||||
print('Complete metadata review could not be completed.\n')
|
|
||||||
print('Make sure a data-owner workflow has already been started on branch '+branch_name+'\n')
|
|
||||||
print('The step "Complete metadata review" will have no effect.')
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# push
|
|
||||||
result = subprocess.run(push_command(repository,branch_name),capture_output=True,text=True,check=True)
|
|
||||||
print(result.stdout)
|
|
||||||
|
|
||||||
# 1. git add output_files/
|
|
||||||
# 2. delete review/
|
|
||||||
#shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
|
|
||||||
# 3. git rm review/
|
|
||||||
# 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date."
|
|
||||||
return result.returncode
|
|
||||||
|
|
||||||
|
|
||||||
#import config_file
|
|
||||||
#import hdf5_ops
|
|
||||||
|
|
||||||
class MetadataHarvester:
|
|
||||||
def __init__(self, parent_files=None):
|
|
||||||
if parent_files is None:
|
|
||||||
parent_files = []
|
|
||||||
self.parent_files = parent_files
|
|
||||||
self.metadata = {
|
|
||||||
"project": {},
|
|
||||||
"sample": {},
|
|
||||||
"environment": {},
|
|
||||||
"instruments": {},
|
|
||||||
"datasets": {}
|
|
||||||
}
|
|
||||||
|
|
||||||
def add_project_info(self, key_or_dict, value=None, append=False):
|
|
||||||
self._add_info("project", key_or_dict, value, append)
|
|
||||||
|
|
||||||
def add_sample_info(self, key_or_dict, value=None, append=False):
|
|
||||||
self._add_info("sample", key_or_dict, value, append)
|
|
||||||
|
|
||||||
def add_environment_info(self, key_or_dict, value=None, append=False):
|
|
||||||
self._add_info("environment", key_or_dict, value, append)
|
|
||||||
|
|
||||||
def add_instrument_info(self, key_or_dict, value=None, append=False):
|
|
||||||
self._add_info("instruments", key_or_dict, value, append)
|
|
||||||
|
|
||||||
def add_dataset_info(self, key_or_dict, value=None, append=False):
|
|
||||||
self._add_info("datasets", key_or_dict, value, append)
|
|
||||||
|
|
||||||
def _add_info(self, category, key_or_dict, value, append):
|
|
||||||
"""Internal helper method to add information to a category."""
|
|
||||||
if isinstance(key_or_dict, dict):
|
|
||||||
self.metadata[category].update(key_or_dict)
|
|
||||||
else:
|
|
||||||
if key_or_dict in self.metadata[category]:
|
|
||||||
if append:
|
|
||||||
current_value = self.metadata[category][key_or_dict]
|
|
||||||
|
|
||||||
if isinstance(current_value, list):
|
|
||||||
|
|
||||||
if not isinstance(value, list):
|
|
||||||
# Append the new value to the list
|
|
||||||
self.metadata[category][key_or_dict].append(value)
|
|
||||||
else:
|
|
||||||
self.metadata[category][key_or_dict] = current_value + value
|
|
||||||
|
|
||||||
elif isinstance(current_value, str):
|
|
||||||
# Append the new value as a comma-separated string
|
|
||||||
self.metadata[category][key_or_dict] = current_value + ',' + str(value)
|
|
||||||
else:
|
|
||||||
# Handle other types (for completeness, usually not required)
|
|
||||||
self.metadata[category][key_or_dict] = [current_value, value]
|
|
||||||
else:
|
|
||||||
self.metadata[category][key_or_dict] = value
|
|
||||||
else:
|
|
||||||
self.metadata[category][key_or_dict] = value
|
|
||||||
|
|
||||||
def get_metadata(self):
|
|
||||||
return {
|
|
||||||
"parent_files": self.parent_files,
|
|
||||||
"metadata": self.metadata
|
|
||||||
}
|
|
||||||
|
|
||||||
def print_metadata(self):
|
|
||||||
print("parent_files", self.parent_files)
|
|
||||||
|
|
||||||
for key in self.metadata.keys():
|
|
||||||
print(key,'metadata:\n')
|
|
||||||
for item in self.metadata[key].items():
|
|
||||||
print(item[0],item[1])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def clear_metadata(self):
|
|
||||||
self.metadata = {
|
|
||||||
"project": {},
|
|
||||||
"sample": {},
|
|
||||||
"environment": {},
|
|
||||||
"instruments": {},
|
|
||||||
"datasets": {}
|
|
||||||
}
|
|
||||||
self.parent_files = []
|
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"
|
|
||||||
output_yml_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.yalm"
|
|
||||||
output_yml_filename_path_tail, filename = os.path.split(output_yml_filename_path)
|
|
||||||
#output_yml_filename_path = hdf5_ops.serialize_metadata(output_filename_path)
|
|
||||||
|
|
||||||
#first_initialize_metadata_review(output_filename_path,initials='NG')
|
|
||||||
#second_submit_metadata_review()
|
|
||||||
#if os.path.exists(os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename)):
|
|
||||||
# third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename))
|
|
||||||
#fourth_complete_metadata_review()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 3:
|
||||||
print("Usage: python pipeline.py run <path/to/descriptor.json>")
|
print("Usage: python pipeline.py run <path/to/descriptor.json>")
|
||||||
|
317
src/git_ops.py
317
src/git_ops.py
@ -1,5 +1,7 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
|
import os
|
||||||
|
import utils.g5505_utils as utils
|
||||||
|
from pipelines.metadata_revision import update_hdf5_file_with_review
|
||||||
|
|
||||||
def perform_git_operations(hdf5_upload):
|
def perform_git_operations(hdf5_upload):
|
||||||
status_command = ['git', 'status']
|
status_command = ['git', 'status']
|
||||||
@ -41,3 +43,316 @@ def get_status():
|
|||||||
def show_current_branch():
|
def show_current_branch():
|
||||||
current_branch_command = ['git','branch','--show-current']
|
current_branch_command = ['git','branch','--show-current']
|
||||||
subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
|
subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
YAML_EXT = ".yaml"
|
||||||
|
TXT_EXT = ".txt"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_review_status(filename_path):
|
||||||
|
|
||||||
|
filename_path_tail, filename_path_head = os.path.split(filename_path)
|
||||||
|
filename, ext = os.path.splitext(filename_path_head)
|
||||||
|
# TODO:
|
||||||
|
with open(os.path.join("review/",filename+"-review_status"+TXT_EXT),'r') as f:
|
||||||
|
workflow_steps = []
|
||||||
|
for line in f:
|
||||||
|
workflow_steps.append(line)
|
||||||
|
return workflow_steps[-1]
|
||||||
|
|
||||||
|
def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs, restart = False):
|
||||||
|
|
||||||
|
"""
|
||||||
|
First: Initialize review branch with review folder with a copy of yaml representation of
|
||||||
|
hdf5 file under review and by creating a txt file with the state of the review process, e.g., under review.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
initials = reviewer_attrs['initials']
|
||||||
|
#branch_name = '-'.join([reviewer_attrs['type'],'review_',initials])
|
||||||
|
branch_name = '_'.join(['review',initials])
|
||||||
|
|
||||||
|
hdf5_file_path_tail, filename_path_head = os.path.split(hdf5_file_path)
|
||||||
|
filename, ext = os.path.splitext(filename_path_head)
|
||||||
|
|
||||||
|
# Check file_path points to h5 file
|
||||||
|
if not 'h5' in ext:
|
||||||
|
raise ValueError("filename_path needs to point to an h5 file.")
|
||||||
|
|
||||||
|
# Verify if yaml snapshot of input h5 file exists
|
||||||
|
if not os.path.exists(os.path.join(hdf5_file_path_tail,filename+YAML_EXT)):
|
||||||
|
raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run serialize_metadata(filename_path) ")
|
||||||
|
|
||||||
|
# Initialize metadata review workflow
|
||||||
|
# print("Create branch metadata-review-by-"+initials+"\n")
|
||||||
|
|
||||||
|
#checkout_review_branch(branch_name)
|
||||||
|
|
||||||
|
# Check you are working at the right branch
|
||||||
|
|
||||||
|
curr_branch = show_current_branch()
|
||||||
|
if not branch_name in curr_branch.stdout:
|
||||||
|
raise ValueError("Branch "+branch_name+" was not found. \nPlease open a Git Bash Terminal, and follow the below instructions: \n1. Change directory to your project's directory. \n2. Excecute the command: git checkout "+branch_name)
|
||||||
|
|
||||||
|
# Check if review file already exists and then check if it is still untracked
|
||||||
|
review_yaml_file_path = os.path.join("review/",filename+YAML_EXT)
|
||||||
|
review_yaml_file_path_tail, ext = os.path.splitext(review_yaml_file_path)
|
||||||
|
review_status_yaml_file_path = os.path.join(review_yaml_file_path_tail+"-review_status"+".txt")
|
||||||
|
|
||||||
|
if not os.path.exists(review_yaml_file_path) or restart:
|
||||||
|
review_yaml_file_path = utils.make_file_copy(os.path.join(hdf5_file_path_tail,filename+YAML_EXT), 'review')
|
||||||
|
if restart:
|
||||||
|
print('metadata review has been reinitialized. The review files will reflect the current state of the hdf5 files metadata')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if not os.path.exists(os.path.join(review_yaml_file_path_tail+"-review_status"+".txt")):
|
||||||
|
|
||||||
|
with open(review_status_yaml_file_path,'w') as f:
|
||||||
|
f.write('under review')
|
||||||
|
|
||||||
|
# Stage untracked review files and commit them to local repository
|
||||||
|
status = get_status()
|
||||||
|
untracked_files = []
|
||||||
|
for line in status.stdout.splitlines():
|
||||||
|
#tmp = line.decode("utf-8")
|
||||||
|
#modified_files.append(tmp.split()[1])
|
||||||
|
if 'review/' in line:
|
||||||
|
if not 'modified' in line: # untracked filesand
|
||||||
|
untracked_files.append(line.strip())
|
||||||
|
else:
|
||||||
|
untracked_files.append(line.strip().split()[1])
|
||||||
|
|
||||||
|
if 'output_files/'+filename+YAML_EXT in line and not 'modified' in line:
|
||||||
|
untracked_files.append(line.strip())
|
||||||
|
|
||||||
|
if untracked_files:
|
||||||
|
result = subprocess.run(add_files_to_git(untracked_files),capture_output=True,check=True)
|
||||||
|
message = 'Initialized metadata review.'
|
||||||
|
commit_output = subprocess.run(commit_changes(message),capture_output=True,check=True)
|
||||||
|
|
||||||
|
for line in commit_output.stdout.splitlines():
|
||||||
|
print(line.decode('utf-8'))
|
||||||
|
#else:
|
||||||
|
# print('This action will not have any effect because metadata review process has been already initialized.')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#status_dict = repo_obj.status()
|
||||||
|
#for filepath, file_status in status_dict.items():
|
||||||
|
# Identify keys associated to review files and stage them
|
||||||
|
# if 'review/'+filename in filepath:
|
||||||
|
# Stage changes
|
||||||
|
# repo_obj.index.add(filepath)
|
||||||
|
|
||||||
|
#author = config_file.author #default_signature
|
||||||
|
#committer = config_file.committer
|
||||||
|
#message = "Initialized metadata review process."
|
||||||
|
#tree = repo_obj.index.write_tree()
|
||||||
|
#oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
||||||
|
|
||||||
|
#print("Add and commit"+"\n")
|
||||||
|
|
||||||
|
return review_yaml_file_path, review_status_yaml_file_path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def second_save_metadata_review(review_yaml_file_path, reviewer_attrs):
|
||||||
|
"""
|
||||||
|
Second: Once you're done reviewing the yaml representation of hdf5 file in review folder.
|
||||||
|
Change the review status to complete and save (add and commit) modified .yalm and .txt files in the project by
|
||||||
|
running this function.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# 1 verify review initializatin was performed first
|
||||||
|
# 2. change review status in txt to complete
|
||||||
|
# 3. git add review/ and git commit -m "Submitted metadata review"
|
||||||
|
|
||||||
|
initials = reviewer_attrs['initials']
|
||||||
|
#branch_name = '-'.join([reviewer_attrs['type'],'review','by',initials])
|
||||||
|
branch_name = '_'.join(['review',initials])
|
||||||
|
# TODO: replace with subprocess + git
|
||||||
|
#checkout_review_branch(repo_obj, branch_name)
|
||||||
|
|
||||||
|
# Check you are working at the right branch
|
||||||
|
curr_branch = show_current_branch()
|
||||||
|
if not branch_name in curr_branch.stdout:
|
||||||
|
raise ValueError('Please checkout ' + branch_name + ' via Git Bash before submitting metadata review files. ')
|
||||||
|
|
||||||
|
# Collect modified review files
|
||||||
|
status = get_status()
|
||||||
|
modified_files = []
|
||||||
|
os.path.basename(review_yaml_file_path)
|
||||||
|
for line in status.stdout.splitlines():
|
||||||
|
# conver line from bytes to str
|
||||||
|
tmp = line.decode("utf-8")
|
||||||
|
if 'modified' in tmp and 'review/' in tmp and os.path.basename(review_yaml_file_path) in tmp:
|
||||||
|
modified_files.append(tmp.split()[1])
|
||||||
|
|
||||||
|
# Stage modified files and commit them to local repository
|
||||||
|
review_yaml_file_path_tail, review_yaml_file_path_head = os.path.split(review_yaml_file_path)
|
||||||
|
filename, ext = os.path.splitext(review_yaml_file_path_head)
|
||||||
|
if modified_files:
|
||||||
|
review_status_file_path = os.path.join("review/",filename+"-review_status"+TXT_EXT)
|
||||||
|
with open(review_status_file_path,'a') as f:
|
||||||
|
f.write('\nsubmitted')
|
||||||
|
|
||||||
|
modified_files.append(review_status_file_path)
|
||||||
|
|
||||||
|
result = subprocess.run(add_files_to_git(modified_files),capture_output=True,check=True)
|
||||||
|
message = 'Submitted metadata review.'
|
||||||
|
commit_output = subprocess.run(commit_changes(message),capture_output=True,check=True)
|
||||||
|
|
||||||
|
for line in commit_output.stdout.splitlines():
|
||||||
|
print(line.decode('utf-8'))
|
||||||
|
else:
|
||||||
|
print('Nothing to commit.')
|
||||||
|
|
||||||
|
#
|
||||||
|
def third_update_hdf5_file_with_review(input_hdf5_file, yaml_review_file, reviewer_attrs={}, hdf5_upload=False):
|
||||||
|
if 'submitted' not in get_review_status(input_hdf5_file):
|
||||||
|
raise ValueError('Review yaml file must be submitted before trying to perform an update. Run first second_submit_metadata_review().')
|
||||||
|
|
||||||
|
update_hdf5_file_with_review(input_hdf5_file, yaml_review_file)
|
||||||
|
perform_git_operations(hdf5_upload)
|
||||||
|
|
||||||
|
def last_submit_metadata_review(reviewer_attrs):
|
||||||
|
|
||||||
|
"""Fourth: """
|
||||||
|
|
||||||
|
initials =reviewer_attrs['initials']
|
||||||
|
|
||||||
|
repository = 'origin'
|
||||||
|
branch_name = '_'.join(['review',initials])
|
||||||
|
|
||||||
|
push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
||||||
|
|
||||||
|
list_branches_command = ['git','branch','--list']
|
||||||
|
|
||||||
|
branches = subprocess.run(list_branches_command,capture_output=True,text=True,check=True)
|
||||||
|
if not branch_name in branches.stdout:
|
||||||
|
print('There is no branch named '+branch_name+'.\n')
|
||||||
|
print('Make sure to run data owner review workflow from the beginning without missing any steps.')
|
||||||
|
return
|
||||||
|
|
||||||
|
curr_branch = show_current_branch()
|
||||||
|
if not branch_name in curr_branch.stdout:
|
||||||
|
print('Complete metadata review could not be completed.\n')
|
||||||
|
print('Make sure a data-owner workflow has already been started on branch '+branch_name+'\n')
|
||||||
|
print('The step "Complete metadata review" will have no effect.')
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# push
|
||||||
|
result = subprocess.run(push_command(repository,branch_name),capture_output=True,text=True,check=True)
|
||||||
|
print(result.stdout)
|
||||||
|
|
||||||
|
# 1. git add output_files/
|
||||||
|
# 2. delete review/
|
||||||
|
#shutil.rmtree(os.path.join(os.path.abspath(os.curdir),"review"))
|
||||||
|
# 3. git rm review/
|
||||||
|
# 4. git commit -m "Completed review process. Current state of hdf5 file and yml should be up to date."
|
||||||
|
return result.returncode
|
||||||
|
|
||||||
|
|
||||||
|
#import config_file
|
||||||
|
#import hdf5_ops
|
||||||
|
|
||||||
|
class MetadataHarvester:
|
||||||
|
def __init__(self, parent_files=None):
|
||||||
|
if parent_files is None:
|
||||||
|
parent_files = []
|
||||||
|
self.parent_files = parent_files
|
||||||
|
self.metadata = {
|
||||||
|
"project": {},
|
||||||
|
"sample": {},
|
||||||
|
"environment": {},
|
||||||
|
"instruments": {},
|
||||||
|
"datasets": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
def add_project_info(self, key_or_dict, value=None, append=False):
|
||||||
|
self._add_info("project", key_or_dict, value, append)
|
||||||
|
|
||||||
|
def add_sample_info(self, key_or_dict, value=None, append=False):
|
||||||
|
self._add_info("sample", key_or_dict, value, append)
|
||||||
|
|
||||||
|
def add_environment_info(self, key_or_dict, value=None, append=False):
|
||||||
|
self._add_info("environment", key_or_dict, value, append)
|
||||||
|
|
||||||
|
def add_instrument_info(self, key_or_dict, value=None, append=False):
|
||||||
|
self._add_info("instruments", key_or_dict, value, append)
|
||||||
|
|
||||||
|
def add_dataset_info(self, key_or_dict, value=None, append=False):
|
||||||
|
self._add_info("datasets", key_or_dict, value, append)
|
||||||
|
|
||||||
|
def _add_info(self, category, key_or_dict, value, append):
|
||||||
|
"""Internal helper method to add information to a category."""
|
||||||
|
if isinstance(key_or_dict, dict):
|
||||||
|
self.metadata[category].update(key_or_dict)
|
||||||
|
else:
|
||||||
|
if key_or_dict in self.metadata[category]:
|
||||||
|
if append:
|
||||||
|
current_value = self.metadata[category][key_or_dict]
|
||||||
|
|
||||||
|
if isinstance(current_value, list):
|
||||||
|
|
||||||
|
if not isinstance(value, list):
|
||||||
|
# Append the new value to the list
|
||||||
|
self.metadata[category][key_or_dict].append(value)
|
||||||
|
else:
|
||||||
|
self.metadata[category][key_or_dict] = current_value + value
|
||||||
|
|
||||||
|
elif isinstance(current_value, str):
|
||||||
|
# Append the new value as a comma-separated string
|
||||||
|
self.metadata[category][key_or_dict] = current_value + ',' + str(value)
|
||||||
|
else:
|
||||||
|
# Handle other types (for completeness, usually not required)
|
||||||
|
self.metadata[category][key_or_dict] = [current_value, value]
|
||||||
|
else:
|
||||||
|
self.metadata[category][key_or_dict] = value
|
||||||
|
else:
|
||||||
|
self.metadata[category][key_or_dict] = value
|
||||||
|
|
||||||
|
def get_metadata(self):
|
||||||
|
return {
|
||||||
|
"parent_files": self.parent_files,
|
||||||
|
"metadata": self.metadata
|
||||||
|
}
|
||||||
|
|
||||||
|
def print_metadata(self):
|
||||||
|
print("parent_files", self.parent_files)
|
||||||
|
|
||||||
|
for key in self.metadata.keys():
|
||||||
|
print(key,'metadata:\n')
|
||||||
|
for item in self.metadata[key].items():
|
||||||
|
print(item[0],item[1])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def clear_metadata(self):
|
||||||
|
self.metadata = {
|
||||||
|
"project": {},
|
||||||
|
"sample": {},
|
||||||
|
"environment": {},
|
||||||
|
"instruments": {},
|
||||||
|
"datasets": {}
|
||||||
|
}
|
||||||
|
self.parent_files = []
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
output_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5"
|
||||||
|
output_yml_filename_path = "output_files/unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.yalm"
|
||||||
|
output_yml_filename_path_tail, filename = os.path.split(output_yml_filename_path)
|
||||||
|
#output_yml_filename_path = hdf5_ops.serialize_metadata(output_filename_path)
|
||||||
|
|
||||||
|
#first_initialize_metadata_review(output_filename_path,initials='NG')
|
||||||
|
#second_submit_metadata_review()
|
||||||
|
#if os.path.exists(os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename)):
|
||||||
|
# third_update_hdf5_file_with_review(output_filename_path, os.path.join(os.path.join(os.path.abspath(os.curdir),"review"),filename))
|
||||||
|
#fourth_complete_metadata_review()
|
||||||
|
Reference in New Issue
Block a user