diff --git a/.gitignore b/.gitignore index 5de2c72..6909c5d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc __pycache__/ *.h5 -tmp_files/ \ No newline at end of file +tmp_files/ +*.ipynb \ No newline at end of file diff --git a/input_files/config_file.py b/input_files/config_file.py index 7acd38e..6531151 100644 --- a/input_files/config_file.py +++ b/input_files/config_file.py @@ -14,85 +14,32 @@ import pygit2 as pygit author = pygit.Signature('Florez Ospina Juan Felipe', 'juan.florez-ospina@psi.ch') committer = pygit.Signature('Florez Ospina Juan Felipe', 'juan.florez-ospina@psi.ch') -group_id = 'smog_chamber' - -#group_id = '5505' output_filename_tempate = lambda group_id, timestamp,user_initials : '_'.join(['unified_file',group_id,timestamp,user_initials])+'.h5' +def created_at(): -now = datetime.now() -# Populate now object with time zone infotmation obtained from the local system -now_tz_aware = now.astimezone() -tz = now_tz_aware.strftime('%z') -#created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S')+'_utcoffset_' + tz -created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz -# Make created at timestamp with tz information -#created_at = now.isoformat() - -usecase = 1 - -if usecase == 1: - - group_id == 'smog_chamber' - user_initials = 'NG' - #from smog_chamber_file_reader import read_txt_files_as_dict - #from g5505_file_reader import copy_file_in_group - #select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps'] - inputfile_dir = '\\\\fs03\\Iron_Sulphate' - inputfile_dir = '//fs03/Iron_Sulphate' - #select_dir_keywords = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26'] - select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26'] - select_file_keywords = ['20220726','2022.07.26'] - - outputfile_dir = 'output_files' - - output_filename = output_filename_tempate(group_id,created_at,user_initials) #'test_smog_chamber_v14.h5' - #output_filename = 'unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5' + now = datetime.now() + # Populate now object with time zone infotmation obtained from the local system + now_tz_aware = now.astimezone() + tz = now_tz_aware.strftime('%z') + #created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S')+'_utcoffset_' + tz + created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz + # Make created at timestamp with tz information + #created_at = now.isoformat() + return created_at - ext_to_reader_dict = {'.txt': scf_reader.read_txt_files_as_dict, +def select_file_readers(group_id): + if group_id == '5505': + ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict, + '.txt': g5505f_reader.read_txt_files_as_dict, + '.dat': g5505f_reader.read_txt_files_as_dict, + '.h5': g5505f_reader.copy_file_in_group} + elif group_id == 'smog_chamber': + ext_to_reader_dict = {'.txt': scf_reader.read_txt_files_as_dict, '.TXT': scf_reader.read_txt_files_as_dict, '.h5': g5505f_reader.copy_file_in_group} - -elif usecase == 2 : - group_id == '5505' - user_initials = 'TBR' - outputfile_dir = 'output_files' - #output_filename = 'test_sls_data_v8.h5' - inputfile_dir = '//fs101/5505/People/Juan/TypicalBeamTime' - select_file_keywords=[] - select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES'] - - output_filename = output_filename_tempate(group_id,created_at,user_initials) - #output_filename = 'unified_file_5505_2024-03-19_UTC-OFST_+0100_TBR.h5' - - ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict, - '.txt': g5505f_reader.read_txt_files_as_dict, - '.dat': g5505f_reader.read_txt_files_as_dict, - '.h5': g5505f_reader.copy_file_in_group} -elif usecase == 3: - user_initials = 'LL' - outputfile_dir = 'output_files' - output_filename = output_filename_tempate(group_id,created_at,user_initials) - - inputfile_dir = '//fs101/5505/Data' - - #select_dir_keywords = ['Lopap', 'Humidity_Sensors', 'ICAD/HONO', 'ICAD/NO2', 'T200_NOX', 'T360U_CO2'] - # TODO: make sure in the code composite keywords are broken down into single keywords - - ##select_dir_keywords = ['Humidity_Sensors','ICAD/HONO','ICAD/NO2'] - select_dir_keywords = ['Humidity_Sensors/2022','ICAD/HONO/2022','ICAD/NO2/2022', '2022/01_Jan', '2022/02_Feb', '2022/03_März'] - - dates = pd.read_excel(os.path.abspath(os.path.join('input_files','date_experiments_for Juan.xlsx'))) - - select_file_keywords=[item.strftime('%Y-%m-%d') for item in dates.loc[0:2,'experiment_date']] - select_file_keywords= select_file_keywords + [item.strftime('%Y%m%d') for item in dates.loc[0:2,'experiment_date']] - - ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict, - '.txt': g5505f_reader.read_txt_files_as_dict, - '.dat': g5505f_reader.read_txt_files_as_dict, - '.h5': g5505f_reader.copy_file_in_group} - + return ext_to_reader_dict diff --git a/src/g5505_file_reader.py b/src/g5505_file_reader.py index 97abb1a..f069e63 100644 --- a/src/g5505_file_reader.py +++ b/src/g5505_file_reader.py @@ -93,10 +93,12 @@ def read_txt_files_as_dict(filename : str ): file_encoding = 'latin-1' elif 'ICAD' in filename and 'HONO' in filename: table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource' - separator = '\t' + separator = '\t' + file_encoding = 'latin-1' elif 'ICAD' in filename and 'NO2' in filename: table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource' separator = '\t' + file_encoding = 'latin-1' else: return {} #raise ValueError('intrument_folder must be set as a either "RGA" or "Pressure"') @@ -107,33 +109,32 @@ def read_txt_files_as_dict(filename : str ): # Work with copy of the file for safety tmp_filename = utils.make_file_copy(source_file_path=filename) - with open(tmp_filename,'r',encoding=file_encoding,errors='ignore') as f: - #file_encoding = f.encoding - #table_preamble = "" + #with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f: + with open(tmp_filename,'rb') as f: table_preamble = [] for line_number, line in enumerate(f): - if table_header in line: - list_of_substrings = line.split(separator) + if table_header in line.decode(file_encoding): + list_of_substrings = line.decode(file_encoding).split(separator) data_start = True column_names = [] for i, name in enumerate(list_of_substrings): column_names.append(str(i)+'_'+name) - print(line_number, len(column_names )) + #print(line_number, len(column_names ),'\n') break # Subdivide line into words, and join them by single space. # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on. - list_of_substrings = line.split() + list_of_substrings = line.decode(file_encoding).split() # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character #line = ' '.join(list_of_substrings+['\n']) - line = ' '.join(list_of_substrings) - table_preamble.append(line)# += new_line + #line = ' '.join(list_of_substrings) + table_preamble.append(' '.join(list_of_substrings))# += new_line header_dict["table_preamble"] = table_preamble - # TODO: it does not work with separater as none :(. fix for RGA + # TODO: it does not work with separator as none :(. fix for RGA try: df = pd.read_csv(tmp_filename, delimiter = separator, diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index 7126179..386d5a4 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -17,6 +17,14 @@ import h5py import yaml +def progressBar(count_value, total, suffix=''): + bar_length = 100 + filled_up_Length = int(round(bar_length* count_value / float(total))) + percentage = round(100.0 * count_value/float(total),1) + bar = '=' * filled_up_Length + '-' * (bar_length - filled_up_Length) + sys.stdout.write('[%s] %s%s ...%s\r' %(bar, percentage, '%', suffix)) + sys.stdout.flush() + def read_mtable_as_dataframe(filename): """ Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input .h5 file @@ -150,10 +158,11 @@ def get_parent_child_relationships(file: h5py.File): parent = [''] #values = [file.attrs['count']] # TODO: maybe we should make this more general and not dependent on file_list attribute? - if 'file_list' in file.attrs.keys(): - values = [len(file.attrs['file_list'])] - else: - values = [1] + #if 'file_list' in file.attrs.keys(): + # values = [len(file.attrs['file_list'])] + #else: + # values = [1] + values = [len(file.keys())] def node_visitor(name,obj): #if isinstance(obj,h5py.Group): @@ -161,10 +170,12 @@ def get_parent_child_relationships(file: h5py.File): parent.append(obj.parent.name) #nodes.append(os.path.split(obj.name)[1]) #parent.append(os.path.split(obj.parent.name)[1]) - if isinstance(obj,h5py.Dataset) or not 'file_list' in obj.attrs.keys(): + + if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys(): values.append(1) else: - values.append(len(obj.attrs['file_list'])) + values.append(len(obj.keys())) + #values.append(len(obj.attrs['file_list'])) file.visititems(node_visitor) return nodes, parent, values @@ -204,15 +215,20 @@ def annotate_root_dir(filename,annotation_dict: dict): import shutil -def create_hdf5_file_from_filesystem_path(ofilename : str, +def create_hdf5_file_from_filesystem_path(config_param : dict , input_file_system_path : str, select_dir_keywords = [], select_file_keywords =[], top_sub_dir_mask : bool = True): +#def create_hdf5_file_from_filesystem_path(output_filename : str, +# input_file_system_path : str, +# select_dir_keywords = [], +# select_file_keywords =[], +# top_sub_dir_mask : bool = True): """ - Creates an .h5 file with name ofilename that preserves the directory tree (or folder structure) of given a filesystem path and - a few file and directory keywords. The keywords enable filtering of directories and files that do not contain the specified keywords. + Creates an .h5 file with name "output_filename" that preserves the directory tree (or folder structure) of given a filesystem path. + When file and directory keywords are non-empty, the keywords enable filtering of directories and files that do not contain the specified keywords. In the .h5 file, only files that are admissible file formats will be stored in the form of datasets and attributes. @@ -237,35 +253,74 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, # Ensure OS compliant paths and keywords + # TODO: validate config_param dict, make sure output_filename is a valid file_path + group_id = config_param['group_id'] + user_initials = config_param['user_initials'] + created_at = config_file.created_at() + output_dir = config_param['output_dir'] + output_filename = output_dir + config_file.output_filename_tempate(group_id,created_at,user_initials) + + admissible_file_ext_list = list(config_file.select_file_readers(group_id).keys()) + if '/' in input_file_system_path: input_file_system_path = input_file_system_path.replace('/',os.sep) else: raise ValueError('input_file_system_path needs to be specified using forward slashes "/".' ) for i, keyword in enumerate(select_dir_keywords): - select_dir_keywords[i] = keyword.replace('/',os.sep) + select_dir_keywords[i] = keyword.replace('/',os.sep) + # Visit each subdirectory from top to bottom, root directory defined by input_file_sytem_path to the lower + # level directories. - with h5py.File(ofilename, 'w') as h5file: + # Constrain walkable paths on the specified directory tree by allowing walks that start from root + # through subdirectories specified by dir_keywords. This improves efficiency especially, in deep + # directory trees with many leaves. + paths = [] + if top_sub_dir_mask: + for item in os.listdir(input_file_system_path): + if any([item in keyword for keyword in select_dir_keywords]): + paths.append(os.path.join(input_file_system_path,item)) + else: + paths.append(input_file_system_path) - # Visit each subdirectory from top to bottom, root directory defined by input_file_sytem_path to the lower - # level directories. - - # Constrain walkable paths on the specified directory tree by allowing walks that start from root - # through subdirectories specified by dir_keywords. This improves efficiency especially, in deep - # directory trees with many leaves. - paths = [] - if top_sub_dir_mask: - for item in os.listdir(input_file_system_path): - if any([item in keyword for keyword in select_dir_keywords]): - paths.append(os.path.join(input_file_system_path,item)) - else: - paths.append(input_file_system_path) + with h5py.File(output_filename, 'w') as h5file: for item in paths: root_dir = input_file_system_path + # Create dictionary with directory-files pairs where files satisfy keyword and admisible type contraints + # It requires an extra pass over directory three and additional memory for dictionary, but it may be useful + # to speed up subsequent step and prune resulting directory tree. + + # For each directory and/or subdirectory, keep files that satisfy file_keyword constraints, and store + # (directory_path, suitable files) relationships in a dictionary. + file_paths_dict = {} + + check_file_ext = lambda filename: any([ext in filename for ext in admissible_file_ext_list]) + + for dirpath, _, filenames in os.walk(item,topdown=False): + file_paths_dict[dirpath] = [] + + # Check files that have an admissible extension and store them in admissible_filenames list + admissible_filenames = [] + for fn in filenames: + if check_file_ext(fn): + admissible_filenames.append(fn) + + if select_file_keywords: # when select_file_keywords = [], all files are considered + for filename in admissible_filenames: + # Do not consider files with types for which there is still no file_reader. TODO: extend file_reader library. + #if not any([ext in filename for ext in admissible_file_ext_list]): + # continue + + # Add files with name, that contains any of the file_keywords + if any([keyword in filename for keyword in select_file_keywords]): + file_paths_dict[dirpath].append(filename) + else: + file_paths_dict[dirpath] = admissible_filenames + for node_number, node in enumerate(os.walk(item, topdown=True)): dirpath, dirnames, filenames_list = node @@ -277,26 +332,24 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, # When select_file_keywords is an empty, i.e., [], do not apply any filter on the filenames. - filtered_filename_list = [] - if select_file_keywords: - for filename in filenames_list: - if any([keyword in filename for keyword in select_file_keywords]): - filtered_filename_list.append(filename) - else: - filtered_filename_list = filenames_list.copy() + #filtered_filename_list = [] + #if select_file_keywords: + # for filename in filenames_list: + # if any([keyword in filename for keyword in select_file_keywords]): + # filtered_filename_list.append(filename) + #else: + # filtered_filename_list = filenames_list.copy() - admissible_file_ext_list = list(config_file.ext_to_reader_dict.keys()) - - for filename in filtered_filename_list.copy(): - if not any([ext in filename for ext in admissible_file_ext_list]): - filtered_filename_list.remove(filename) + filtered_filename_list = file_paths_dict.get(dirpath,filenames_list.copy()) # Skip subdirectories that do not contain a keyword in the parameter 'select_dir_keywords' when it is nonempty if select_dir_keywords: #if (dirpath.count(os.sep) > offset) and not any([item in dirpath for item in select_dir_keywords]): + #tail, dirname = os.path.split(dirpath) + #if not any([item in dirname for item in select_dir_keywords]): if not any([item in dirpath for item in select_dir_keywords]): - continue + continue group_name = dirpath.replace(os.sep,'/') group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/') @@ -308,46 +361,62 @@ def create_hdf5_file_from_filesystem_path(ofilename : str, # TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory) - for filename in filtered_filename_list: + for filenumber, filename in enumerate(filtered_filename_list): # Get file extension (or file type) file_name, file_ext = os.path.splitext(filename) + #print(filename) + #try: if not 'h5' in filename: - file_dict = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) + file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename)) if not file_dict: continue - # Create group and add their attributes - h5file[group_name].create_group(name=file_dict['name']) - for key in file_dict['attributes_dict'].keys(): - - # Represent string values as fixed length strings in the HDF5 file, which need - # to be decoded as string when we read them. It provides better control than variable strings, - # at the expense of flexibility. - # https://docs.h5py.org/en/stable/strings.html - value = file_dict['attributes_dict'][key] - if isinstance(value,str): - utf8_type = h5py.string_dtype('utf-8', len(value)) - value = np.array(value.encode('utf-8'),dtype=utf8_type) + try: + # Create group and add their attributes + h5file[group_name].create_group(name=file_dict['name']) + for key in file_dict['attributes_dict'].keys(): + + # Represent string values as fixed length strings in the HDF5 file, which need + # to be decoded as string when we read them. It provides better control than variable strings, + # at the expense of flexibility. + # https://docs.h5py.org/en/stable/strings.html + value = file_dict['attributes_dict'][key] + if isinstance(value,str): + utf8_type = h5py.string_dtype('utf-8', len(value)) + value = np.array(value.encode('utf-8'),dtype=utf8_type) - h5file[group_name][file_dict['name']].attrs.create(name=key, - data=value) - - # Add datasets to just created group - for dataset in file_dict['datasets']: - h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'], - data = dataset['data'], - #dtype = file_dict['dtype'], - shape = dataset['shape']) + h5file[group_name][file_dict['name']].attrs.create(name=key, + data=value) + + # Add datasets to just created group + for dataset in file_dict['datasets']: + h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'], + data = dataset['data'], + #dtype = file_dict['dtype'], + shape = dataset['shape']) + + except Exception as inst: + # TODO: log when a file could not be stored as a dataset + print(inst) else: - config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename), + config_file.select_file_readers(group_id)[file_ext](source_file_path = os.path.join(dirpath,filename), dest_file_obj = h5file, dest_group_name = group_name +'/'+filename) - print(file_ext, ':)') + #print(filename,file_ext, ':)') + + + progressBar(filenumber,len(filtered_filename_list), 'Uploading files in ' + dirpath) + + + + output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename) + + return output_filename, output_yml_filename_path diff --git a/src/hdf5_vis.py b/src/hdf5_vis.py index ada825a..036f9bc 100644 --- a/src/hdf5_vis.py +++ b/src/hdf5_vis.py @@ -140,7 +140,7 @@ def print_metadata(name, obj, folder_depth, yaml_dict): #group_dict[obj.name]["name"] = obj.name #group_dict[obj.name]["attributes"] = attr_dict #group_dict[obj.name]["datasets"] = {} - print(name) + #print(name) yaml_dict[obj.name] = group_dict elif isinstance(obj, h5py.Dataset): diff --git a/src/metadata_review_lib.py b/src/metadata_review_lib.py index 4ba095d..cd85a37 100644 --- a/src/metadata_review_lib.py +++ b/src/metadata_review_lib.py @@ -35,16 +35,26 @@ def get_review_status(filename_path): workflow_steps.append(line) return workflow_steps[-1] -def checkout_review_branch(repo_obj,branch_name): +def checkout_review_branch(branch_name): # Create a new branch #branch_name = 'metadata-review-by-'+initials head_commit = repo_obj.head.peel()# Get the commit hash associated with HEAD - if not branch_name in repo_obj.branches: - branch = repo_obj.create_branch(branch_name, head_commit) - else: - branch = repo_obj.branches[branch_name] - repo_obj.checkout(branch) + checkout_branch_command = lambda branch_name : ['git','checkout', branch_name] + output = subprocess.run(checkout_branch_command(branch_name), capture_output=True,text=True,check=True) + + print(output.stdout) + + #if not branch_name in repo_obj.branches: + # branch = repo_obj.create_branch(branch_name, head_commit) + #else: + # branch = repo_obj.branches[branch_name] + #repo_obj.checkout(branch) + +current_branch_command = ['git','branch','--show-current'] +status_command = ['git','status'] +add_command = lambda add_list: ['git','add'] + add_list +commit_command = lambda message: ['git','commit','-m', message] def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs): @@ -69,35 +79,55 @@ def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs): raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run take_yml_snapshot_of_hdf5_file(filename_path) ") # Initialize metadata review workflow - print("Create branch metadata-review-by-"+initials+"\n") + # print("Create branch metadata-review-by-"+initials+"\n") + + #checkout_review_branch(branch_name) + # Check you are working at the right branch + current_branch_command = ['git','branch','--show-current'] + curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True) + if not branch_name in curr_branch.stdout: + raise ValueError("Please checkout the branch: "+branch_name+" via Git Bash Terminal while in the project's directory") + # Check if review file already exists and then check if it is still untracked review_yaml_file_path = os.path.join("review/",filename+YAML_EXT) + if not os.path.exists(review_yaml_file_path): review_yaml_file_path = utils.make_file_copy(os.path.join(hdf5_file_path_tail,filename+YAML_EXT), 'review') - #else: - # raise Warning("the file " + os.path.join("review/",filename+YAML_EXT)+ " already exists. Delete this file to reinitialize the metadata review process.") - review_yaml_file_path_tail, ext = os.path.splitext(review_yaml_file_path) with open(os.path.join(review_yaml_file_path_tail+"-review_status"+".txt"),'w') as f: f.write('under review') - checkout_review_branch(repo_obj, branch_name) + # Stage review files and commit them to local repository + status = subprocess.run(status_command,capture_output=True,text=True,check=True) + untracked_files_for_review = [] + for line in status.stdout.splitlines(): + if 'review/' in line.decode('utf8'): + untracked_files_for_review.append(line) - status_dict = repo_obj.status() - for filepath, file_status in status_dict.items(): + result = subprocess.run(add_command(untracked_files_for_review),capture_output=True,check=True) + message = 'Initialized metadata review.' + commit_output = subprocess.run(commit_command(message),capture_output=True,check=True) + + print(commit_output.stdout) + + + + + #status_dict = repo_obj.status() + #for filepath, file_status in status_dict.items(): # Identify keys associated to review files and stage them - if 'review/'+filename in filepath: + # if 'review/'+filename in filepath: # Stage changes - repo_obj.index.add(filepath) + # repo_obj.index.add(filepath) - author = config_file.author #default_signature - committer = config_file.committer - message = "Initialized metadata review process." - tree = repo_obj.index.write_tree() - oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid]) + #author = config_file.author #default_signature + #committer = config_file.committer + #message = "Initialized metadata review process." + #tree = repo_obj.index.write_tree() + #oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid]) #print("Add and commit"+"\n") @@ -119,41 +149,33 @@ def second_submit_metadata_review(review_yaml_file_path, reviewer_attrs): initials = reviewer_attrs['initials'] branch_name = '-'.join([reviewer_attrs['type'],'review','by',initials]) # TODO: replace with subprocess + git - checkout_review_branch(repo_obj, branch_name) + #checkout_review_branch(repo_obj, branch_name) + # Check you are working at the right branch + curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True) + if not branch_name in curr_branch.stdout: + raise ValueError('Please checkout ' + branch_name + ' via Git Bash before submitting metadata review files. ') - #if any([status in get_review_status(filename_path) for status in ['under review','submitted']]): - # filename_path_tail, filename_path_head = os.path.split(filename_path) - # filename, ext = os.path.splitext(filename_path_head) - # # TODO: - - - ## - status_command = ['git','status'] - add_command = lambda add_list: ['git','add'] + add_list - commit_command = lambda message: ['git','commit','-m', message] - #push_command = lambda repository,refspec: ['git','push',repository,refspec] - + # Collect modified review files status = subprocess.run(status_command,capture_output=True,check=True) - - files_to_add_list = [] + modified_files = [] for line in status.stdout.splitlines(): # conver line from bytes to str tmp = line.decode("utf-8") if 'modified' in tmp and review_yaml_file_path in tmp: - files_to_add_list.append(tmp.split()[1]) - ## + modified_files.append(tmp.split()[1]) + # Stage modified files and commit them to local repository review_yaml_file_path_tail, review_yaml_file_path_head = os.path.split(review_yaml_file_path) filename, ext = os.path.splitext(review_yaml_file_path_head) - if files_to_add_list: + if modified_files: review_status_file_path = os.path.join("review/",filename+"-review_status"+TXT_EXT) with open(review_status_file_path,'a') as f: f.write('\nsubmitted') - files_to_add_list.append(review_status_file_path) + modified_files.append(review_status_file_path) - result = subprocess.run(add_command(files_to_add_list),capture_output=True,check=True) + result = subprocess.run(add_command(modified_files),capture_output=True,check=True) message = 'Submitted metadata review.' commit_output = subprocess.run(commit_command(message),capture_output=True,check=True) @@ -163,20 +185,6 @@ def second_submit_metadata_review(review_yaml_file_path, reviewer_attrs): print('Nothing to commit.') - #status_dict = repo_obj.status() - #for filepath, file_status in status_dict.items(): - # Identify keys associated to review files and stage them - # if ('review/'+filename in filepath) and (file_status == pygit.GIT_STATUS_WT_MODIFIED): - # Stage changes - # repo_obj.index.add(filepath) - - #author = config_file.author #default_signature - #committer = config_file.committer - #message = "Submitted metadata review." - #tree = repo_obj.index.write_tree() - #oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid]) - - def third_complete_metadata_review(reviewer_attrs): diff --git a/workflow_data_integration.ipynb b/workflow_data_integration.ipynb index fc82d59..45dc92c 100644 --- a/workflow_data_integration.ipynb +++ b/workflow_data_integration.ipynb @@ -1,41 +1,55 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import python packages and modules" + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "# Set up project root directory\n", + "root_dir = os.path.abspath(os.curdir)\n", + "sys.path.append(root_dir)\n", + "\n", + "import src.hdf5_vis as hdf5_vis\n", + "import src.hdf5_lib as hdf5_lib\n", + "import input_files.config_file as config_file\n", + "\n", + "\n", + "output_dir = 'output_files/'\n", + "group_id = '5505'#'smog_chamber'#'5505'\n", + "user_initials = 'LL' #'NG' #'LL' # 'TBR'\n", + "\n", + "group_id = 'smog_chamber'#'5505'\n", + "user_initials = 'NG'#'LL' #'NG' #'LL' # 'TBR'\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Define input file path and keywords\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2 26\n", - ".dat :)\n", - "2 26\n", - ".dat :)\n", - "2 26\n", - ".dat :)\n", - "2 26\n", - ".dat :)\n", - "Humidity_Sensors\n", - "Humidity_Sensors/2022\n", - "Humidity_Sensors/2022/01_Januar\n", - "Humidity_Sensors/2022/02_Februar\n", - "Humidity_Sensors/2022/03_März\n", - "Humidity_Sensors/2022/04_April\n", - "Humidity_Sensors/2022/05_Mai\n", - "Humidity_Sensors/2022/06_Juni\n", - "Humidity_Sensors/2022/07_Juli\n", - "Humidity_Sensors/2022/10_Oktober\n", - "Humidity_Sensors/2022/11_November\n", - "Humidity_Sensors/2022/12_Dezember\n", - "ICAD\n", - "ICAD/HONO\n", - "ICAD/HONO/2022\n", - "ICAD/HONO_prototype\n", - "ICAD/HONO_prototype/2022\n", - "ICAD/NO2\n", - "ICAD/NO2/2022\n" + "[==================================================--------------------------------------------------] 50.0% ...Uploading files in \\\\fs03\\Iron_Sulphate\\smps\\20220726\r" ] }, { @@ -49,237 +63,113 @@ "branchvalues": "remainder", "customdata": [ "
", - "/Humidity_Sensors", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022/01_Januar", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/02_Februar", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/03_März", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/04_April", - "/Humidity_Sensors/2022/05_Mai", - "/Humidity_Sensors/2022/06_Juni", - "/Humidity_Sensors/2022/07_Juli", - "/Humidity_Sensors/2022/10_Oktober", - "/Humidity_Sensors/2022/11_November", - "/Humidity_Sensors/2022/12_Dezember", - "/ICAD", - "/ICAD/HONO", - "/ICAD/HONO/2022", - "/ICAD/HONO/2022/10_Oct", - "/ICAD/HONO/2022/11_Nov", - "/ICAD/HONO/2022/12_Dec", - "/ICAD/HONO_prototype", - "/ICAD/HONO_prototype/2022", - "/ICAD/HONO_prototype/2022/01_Jan", - "/ICAD/HONO_prototype/2022/02_Feb", - "/ICAD/NO2", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022/01_Jan", - "/ICAD/NO2/2022/02_Feb", - "/ICAD/NO2/2022/03_Mar", - "/ICAD/NO2/2022/04_Apr", - "/ICAD/NO2/2022/05_May", - "/ICAD/NO2/2022/06_June", - "/ICAD/NO2/2022/07_July", - "/ICAD/NO2/2022/10_Oct", - "/ICAD/NO2/2022/11_Nov", - "/ICAD/NO2/2022/12_Dec" + "/gas", + "/gas/20220726_000004_MSC_gases.txt", + "/gas/20220726_000004_MSC_gases.txt/categorial_variable_names", + "/gas/20220726_000004_MSC_gases.txt/categorical_variables", + "/gas/20220726_000004_MSC_gases.txt/numerical_variable_names", + "/gas/20220726_000004_MSC_gases.txt/numerical_variables", + "/gas/20220726_101617_MSC_gases.txt", + "/gas/20220726_101617_MSC_gases.txt/categorial_variable_names", + "/gas/20220726_101617_MSC_gases.txt/categorical_variables", + "/gas/20220726_101617_MSC_gases.txt/numerical_variable_names", + "/gas/20220726_101617_MSC_gases.txt/numerical_variables", + "/smps", + "/smps/20220726", + "/smps/20220726/20220726_mass.TXT", + "/smps/20220726/20220726_mass.TXT/categorial_variable_names", + "/smps/20220726/20220726_mass.TXT/categorical_variables", + "/smps/20220726/20220726_mass.TXT/numerical_variable_names", + "/smps/20220726/20220726_mass.TXT/numerical_variables", + "/smps/20220726/20220726_num.TXT", + "/smps/20220726/20220726_num.TXT/categorial_variable_names", + "/smps/20220726/20220726_num.TXT/categorical_variables", + "/smps/20220726/20220726_num.TXT/numerical_variable_names", + "/smps/20220726/20220726_num.TXT/numerical_variables" ], "hovertemplate": "%{label}
Count: %{value}
Path: %{customdata}", "labels": [ "/", - "/Humidity_Sensors", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022/01_Januar", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/02_Februar", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/03_März", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorial_variable_names", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorical_variables", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variable_names", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variables", - "/Humidity_Sensors/2022/04_April", - "/Humidity_Sensors/2022/05_Mai", - "/Humidity_Sensors/2022/06_Juni", - "/Humidity_Sensors/2022/07_Juli", - "/Humidity_Sensors/2022/10_Oktober", - "/Humidity_Sensors/2022/11_November", - "/Humidity_Sensors/2022/12_Dezember", - "/ICAD", - "/ICAD/HONO", - "/ICAD/HONO/2022", - "/ICAD/HONO/2022/10_Oct", - "/ICAD/HONO/2022/11_Nov", - "/ICAD/HONO/2022/12_Dec", - "/ICAD/HONO_prototype", - "/ICAD/HONO_prototype/2022", - "/ICAD/HONO_prototype/2022/01_Jan", - "/ICAD/HONO_prototype/2022/02_Feb", - "/ICAD/NO2", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022/01_Jan", - "/ICAD/NO2/2022/02_Feb", - "/ICAD/NO2/2022/03_Mar", - "/ICAD/NO2/2022/04_Apr", - "/ICAD/NO2/2022/05_May", - "/ICAD/NO2/2022/06_June", - "/ICAD/NO2/2022/07_July", - "/ICAD/NO2/2022/10_Oct", - "/ICAD/NO2/2022/11_Nov", - "/ICAD/NO2/2022/12_Dec" + "/gas", + "/gas/20220726_000004_MSC_gases.txt", + "/gas/20220726_000004_MSC_gases.txt/categorial_variable_names", + "/gas/20220726_000004_MSC_gases.txt/categorical_variables", + "/gas/20220726_000004_MSC_gases.txt/numerical_variable_names", + "/gas/20220726_000004_MSC_gases.txt/numerical_variables", + "/gas/20220726_101617_MSC_gases.txt", + "/gas/20220726_101617_MSC_gases.txt/categorial_variable_names", + "/gas/20220726_101617_MSC_gases.txt/categorical_variables", + "/gas/20220726_101617_MSC_gases.txt/numerical_variable_names", + "/gas/20220726_101617_MSC_gases.txt/numerical_variables", + "/smps", + "/smps/20220726", + "/smps/20220726/20220726_mass.TXT", + "/smps/20220726/20220726_mass.TXT/categorial_variable_names", + "/smps/20220726/20220726_mass.TXT/categorical_variables", + "/smps/20220726/20220726_mass.TXT/numerical_variable_names", + "/smps/20220726/20220726_mass.TXT/numerical_variables", + "/smps/20220726/20220726_num.TXT", + "/smps/20220726/20220726_num.TXT/categorial_variable_names", + "/smps/20220726/20220726_num.TXT/categorical_variables", + "/smps/20220726/20220726_num.TXT/numerical_variable_names", + "/smps/20220726/20220726_num.TXT/numerical_variables" ], "name": "", "parents": [ "", "/", - "/Humidity_Sensors", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022/01_Januar", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022/02_Februar", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022/03_März", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022", - "/Humidity_Sensors/2022", + "/gas", + "/gas/20220726_000004_MSC_gases.txt", + "/gas/20220726_000004_MSC_gases.txt", + "/gas/20220726_000004_MSC_gases.txt", + "/gas/20220726_000004_MSC_gases.txt", + "/gas", + "/gas/20220726_101617_MSC_gases.txt", + "/gas/20220726_101617_MSC_gases.txt", + "/gas/20220726_101617_MSC_gases.txt", + "/gas/20220726_101617_MSC_gases.txt", "/", - "/ICAD", - "/ICAD/HONO", - "/ICAD/HONO/2022", - "/ICAD/HONO/2022", - "/ICAD/HONO/2022", - "/ICAD", - "/ICAD/HONO_prototype", - "/ICAD/HONO_prototype/2022", - "/ICAD/HONO_prototype/2022", - "/ICAD", - "/ICAD/NO2", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022", - "/ICAD/NO2/2022" + "/smps", + "/smps/20220726", + "/smps/20220726/20220726_mass.TXT", + "/smps/20220726/20220726_mass.TXT", + "/smps/20220726/20220726_mass.TXT", + "/smps/20220726/20220726_mass.TXT", + "/smps/20220726", + "/smps/20220726/20220726_num.TXT", + "/smps/20220726/20220726_num.TXT", + "/smps/20220726/20220726_num.TXT", + "/smps/20220726/20220726_num.TXT" ], "root": { "color": "lightgrey" }, "type": "treemap", "values": [ - 1, - 1, - 0, - 5, - 1, - 1, - 1, - 1, - 1, - 14, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 6, - 1, + 2, + 2, + 4, 1, 1, 1, 1, 4, - 9, - 11, - 3, - 8, - 17, + 1, + 1, + 1, + 1, + 1, 2, - 1, - 1, - 0, - 7, - 8, - 2, - 1, - 1, - 3, - 6, - 1, - 0, - 3, - 6, - 5, - 3, 4, - 6, - 2, - 5, - 8, - 2 + 1, + 1, + 1, + 1, + 4, + 1, + 1, + 1, + 1 ] } ], @@ -1116,18 +1006,37 @@ } ], "source": [ - "import sys\n", - "import os\n", - "root_dir = os.path.abspath(os.curdir)\n", - "sys.path.append(root_dir)\n", + "#input_file_dir = '//fs101/5505/People/Juan/TypicalBeamTime'\n", + "#select_file_keywords=[]\n", + "#select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']\n", "\n", - "import src.hdf5_vis as hdf5_vis\n", - "import src.hdf5_lib as hdf5_lib\n", "\n", - "output_filename_path, output_yml_filename_path = hdf5_lib.main()\n", + "#input_file_dir = '//fs101/5505/Data' \n", + "#select_dir_keywords = ['Lopap', 'Humidity_Sensors/2022', 'ICAD/HONO/2022', 'ICAD/NO2/2022', 'T200_NOX', 'T360U_CO2']\n", + "#select_file_keywords = ['2022-03-25','2022_03_25','20220325']\n", + "\n", + "\n", + "input_file_dir = '//fs03/Iron_Sulphate'\n", + "select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26']\n", + "#select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps'] \n", + "\n", + "select_file_keywords = ['20220726','2022.07.26']\n", + "\n", + "config_param = {'group_id' : group_id, 'user_initials' : user_initials, 'output_dir': output_dir}\n", + "\n", + "\n", + "output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(config_param,\n", + " input_file_dir,\n", + " select_dir_keywords,\n", + " select_file_keywords)\n", "\n", "hdf5_vis.display_group_hierarchy_on_a_treemap(output_filename_path)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/workflow_data_owner_review.ipynb b/workflow_data_owner_review.ipynb new file mode 100644 index 0000000..a8fc237 --- /dev/null +++ b/workflow_data_owner_review.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import libraries and modules\n", + "\n", + "* Excecute (or Run) Cell" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "root_dir = os.path.abspath(os.curdir)\n", + "sys.path.append(root_dir)\n", + "\n", + "import src.metadata_review_lib as metadata_review_lib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 1: initialize metadata review.\n", + "\n", + "* Specify hdf5 file whose metadata is to be reviewed by editing the string variable `hdf5_file_path`.\n", + "* Edit reviewer attributes, i.e., the dict variable `reviewer_attrs` with your own initials and role.\n", + "* Excecute Cell." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Create branch metadata-review-by-NG\n", + "\n" + ] + }, + { + "ename": "ValueError", + "evalue": "('Please checkout the branch: ', 'data-owner-review-by-NG', \" via Git Bash Terminal while in the project's directory\")", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[2], line 12\u001b[0m\n\u001b[0;32m 7\u001b[0m reviewer_attrs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minitials\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNG\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 8\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata-owner\u001b[39m\u001b[38;5;124m'\u001b[39m}\n\u001b[0;32m 10\u001b[0m \u001b[38;5;66;03m#output_filename_path, output_yml_filename_path = hdf5_lib.main()\u001b[39;00m\n\u001b[1;32m---> 12\u001b[0m review_yaml_file_path \u001b[38;5;241m=\u001b[39m \u001b[43mmetadata_review_lib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfirst_initialize_metadata_review\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhdf5_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreviewer_attrs\u001b[49m\u001b[43m)\u001b[49m \n\u001b[0;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(review_yaml_file_path) \n", + "File \u001b[1;32mc:\\Users\\florez_j\\Documents\\GitLab\\functionspython\\src\\metadata_review_lib.py:89\u001b[0m, in \u001b[0;36mfirst_initialize_metadata_review\u001b[1;34m(hdf5_file_path, reviewer_attrs)\u001b[0m\n\u001b[0;32m 87\u001b[0m curr_branch \u001b[38;5;241m=\u001b[39m subprocess\u001b[38;5;241m.\u001b[39mrun(current_branch_command,capture_output\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,check\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 88\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m branch_name \u001b[38;5;129;01min\u001b[39;00m curr_branch\u001b[38;5;241m.\u001b[39mstdout:\n\u001b[1;32m---> 89\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease checkout the branch: \u001b[39m\u001b[38;5;124m\"\u001b[39m,branch_name,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m via Git Bash Terminal while in the project\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms directory\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 91\u001b[0m \u001b[38;5;66;03m# Check if review file already exists and then check if it is still untracked\u001b[39;00m\n\u001b[0;32m 92\u001b[0m review_yaml_file_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreview/\u001b[39m\u001b[38;5;124m\"\u001b[39m,filename\u001b[38;5;241m+\u001b[39mYAML_EXT)\n", + "\u001b[1;31mValueError\u001b[0m: ('Please checkout the branch: ', 'data-owner-review-by-NG', \" via Git Bash Terminal while in the project's directory\")" + ] + } + ], + "source": [ + "\n", + "#hdf5_file_path = \"output_files/unified_file_smog_chamber_2024-03-25_UTC-OFST_+0100_NG.h5\"\n", + "#yml_file_path = \"output_files/unified_file_smog_chamber_2024-03-25_UTC-OFST_+0100_NG.yaml\"\n", + "\n", + "hdf5_file_path = \"output_files/unified_file_smog_chamber_2024-04-03_UTC-OFST_+0200_NG.h5\"\n", + "yml_file_path = \"output_files/unified_file_smog_chamber_2024-04-03_UTC-OFST_+0200_NG.yaml\"\n", + "\n", + "reviewer_attrs = {'initials': 'NG',\n", + " 'type': 'data-owner'}\n", + "\n", + "#output_filename_path, output_yml_filename_path = hdf5_lib.main()\n", + "\n", + "review_yaml_file_path = metadata_review_lib.first_initialize_metadata_review(hdf5_file_path, reviewer_attrs) \n", + "\n", + "print(review_yaml_file_path) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 2: Submit metadata review. \n", + "\n", + "* Edit yaml file in review folder and save changes\n", + "* Excecute Cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metadata_review_lib.second_submit_metadata_review(review_yaml_file_path,reviewer_attrs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 3: Update hdf5 file metadata w/ submitted review yaml file.\n", + "\n", + "* Make sure previous step was carried out properly.\n", + "* Excecute Cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "metadata_review_lib.third_update_hdf5_file_with_review(hdf5_file_path, review_yaml_file_path, reviewer_attrs)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 4: Complete data-owner review. Update remote repository\n", + "\n", + "* Excecute Cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metadata_review_lib.fourth_complete_metadata_review(reviewer_attrs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "test_atmos_chem_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/workflow_metadata_reviewer.ipynb b/workflow_metadata_reviewer.ipynb new file mode 100644 index 0000000..709d82c --- /dev/null +++ b/workflow_metadata_reviewer.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}