Resolved incoming changes from main branch

2024-04-04 13:07:43 +02:00
parent 48d3b8f492 44d4a7b29b
commit 5839c0f466
9 changed files with 557 additions and 444 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 *.pyc
 __pycache__/
 *.h5
-tmp_files/
+tmp_files/
+*.ipynb
--- a/input_files/config_file.py
+++ b/input_files/config_file.py
@ -14,85 +14,32 @@ import pygit2 as pygit
 author = pygit.Signature('Florez Ospina Juan Felipe', 'juan.florez-ospina@psi.ch')
 committer = pygit.Signature('Florez Ospina Juan Felipe', 'juan.florez-ospina@psi.ch')

-group_id = 'smog_chamber'
-
-#group_id = '5505'

 output_filename_tempate = lambda group_id, timestamp,user_initials : '_'.join(['unified_file',group_id,timestamp,user_initials])+'.h5'

+def created_at():

-now = datetime.now()
-# Populate now object with time zone infotmation obtained from the local system
-now_tz_aware = now.astimezone()
-tz = now_tz_aware.strftime('%z')
-#created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S')+'_utcoffset_' + tz
-created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz
-# Make created at timestamp with tz information
-#created_at = now.isoformat()
-
-usecase = 1
-
-if usecase == 1:
-
-    group_id == 'smog_chamber'
-    user_initials = 'NG'
-    #from smog_chamber_file_reader import read_txt_files_as_dict 
-    #from g5505_file_reader import copy_file_in_group
-    #select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps']    
-    inputfile_dir = '\\\\fs03\\Iron_Sulphate'
-    inputfile_dir = '//fs03/Iron_Sulphate'
-    #select_dir_keywords = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26']
-    select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26']
-    select_file_keywords = ['20220726','2022.07.26']
-
-    outputfile_dir = 'output_files'
-
-    output_filename = output_filename_tempate(group_id,created_at,user_initials) #'test_smog_chamber_v14.h5'
-    #output_filename = 'unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5'
+    now = datetime.now()
+    # Populate now object with time zone infotmation obtained from the local system
+    now_tz_aware = now.astimezone()
+    tz = now_tz_aware.strftime('%z')
+    #created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S')+'_utcoffset_' + tz
+    created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz
+    # Make created at timestamp with tz information
+    #created_at = now.isoformat()
+    return created_at


-    ext_to_reader_dict = {'.txt': scf_reader.read_txt_files_as_dict,
+def select_file_readers(group_id):
+    if group_id == '5505':
+        ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
+                          '.txt': g5505f_reader.read_txt_files_as_dict,
+                          '.dat': g5505f_reader.read_txt_files_as_dict,
+                          '.h5': g5505f_reader.copy_file_in_group}
+    elif group_id == 'smog_chamber':
+        ext_to_reader_dict = {'.txt': scf_reader.read_txt_files_as_dict,
                        '.TXT': scf_reader.read_txt_files_as_dict,
                        '.h5': g5505f_reader.copy_file_in_group}
-    
-elif usecase == 2 :

-    group_id == '5505'
-    user_initials = 'TBR'
-    outputfile_dir = 'output_files'
-    #output_filename = 'test_sls_data_v8.h5'
-    inputfile_dir = '//fs101/5505/People/Juan/TypicalBeamTime'
-    select_file_keywords=[]
-    select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
-
-    output_filename = output_filename_tempate(group_id,created_at,user_initials)
-    #output_filename = 'unified_file_5505_2024-03-19_UTC-OFST_+0100_TBR.h5'
-
-    ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
-                          '.txt': g5505f_reader.read_txt_files_as_dict,
-                          '.dat': g5505f_reader.read_txt_files_as_dict,
-                          '.h5': g5505f_reader.copy_file_in_group}
-elif usecase == 3:
-    user_initials = 'LL'
-    outputfile_dir = 'output_files'
-    output_filename = output_filename_tempate(group_id,created_at,user_initials)
-    
-    inputfile_dir = '//fs101/5505/Data'
-    
-    #select_dir_keywords = ['Lopap', 'Humidity_Sensors', 'ICAD/HONO', 'ICAD/NO2', 'T200_NOX', 'T360U_CO2']
-    # TODO: make sure in the code composite keywords are broken down into single keywords
-
-    ##select_dir_keywords = ['Humidity_Sensors','ICAD/HONO','ICAD/NO2']
-    select_dir_keywords = ['Humidity_Sensors/2022','ICAD/HONO/2022','ICAD/NO2/2022', '2022/01_Jan', '2022/02_Feb', '2022/03_März']
-    
-    dates = pd.read_excel(os.path.abspath(os.path.join('input_files','date_experiments_for Juan.xlsx')))
-
-    select_file_keywords=[item.strftime('%Y-%m-%d') for item in dates.loc[0:2,'experiment_date']]
-    select_file_keywords= select_file_keywords + [item.strftime('%Y%m%d') for item in dates.loc[0:2,'experiment_date']]
-
-    ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
-                          '.txt': g5505f_reader.read_txt_files_as_dict,
-                          '.dat': g5505f_reader.read_txt_files_as_dict,
-                          '.h5': g5505f_reader.copy_file_in_group}
-    
+    return ext_to_reader_dict

--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -93,10 +93,12 @@ def read_txt_files_as_dict(filename : str ):
        file_encoding = 'latin-1'
    elif 'ICAD' in filename and 'HONO' in filename:
        table_header = 'Start Date/Time (UTC)	Duration (s)	NO2 (ppb)	NO2 Uncertainty (ppb)	H2O (ppb)	H2O Uncertainty (ppb)	CHOCHO (ppb)	CHOCHO Uncertainty (ppb)	File Number	Light Intensity	#ICEDOAS iter.	Cell Pressure	Ambient Pressure	Cell Temp	Spec Temp	Lat	Lon	Height	Speed	GPSQuality	0-Air Ref. Time	0-Air Ref. Duration	0-Air Ref. File Number	0-Air Ref. Intensity	0-Air Ref. Rel Intensity	0-Air Ref. Intensity valid	MeasMode	SampleSource'
-        separator = '\t'        
+        separator = '\t' 
+        file_encoding = 'latin-1'       
    elif 'ICAD' in filename and 'NO2' in filename:
        table_header = 'Start Date/Time (UTC)	Duration (s)	NO2 (ppb)	NO2 Uncertainty (ppb)	H2O (ppb)	H2O Uncertainty (ppb)	CHOCHO (ppb)	CHOCHO Uncertainty (ppb)	File Number	Light Intensity	#ICEDOAS iter.	Cell Pressure	Ambient Pressure	Cell Temp	Spec Temp	Lat	Lon	Height	Speed	GPSQuality	0-Air Ref. Time	0-Air Ref. Duration	0-Air Ref. File Number	0-Air Ref. Intensity	0-Air Ref. Rel Intensity	0-Air Ref. Intensity valid	MeasMode	SampleSource'
        separator = '\t'
+        file_encoding = 'latin-1'
    else:
        return {}
        #raise ValueError('intrument_folder must be set as a either "RGA" or "Pressure"')
@ -107,33 +109,32 @@ def read_txt_files_as_dict(filename : str ):
    # Work with copy of the file for safety
    tmp_filename = utils.make_file_copy(source_file_path=filename)

-    with open(tmp_filename,'r',encoding=file_encoding,errors='ignore') as f:
-        #file_encoding = f.encoding
-        #table_preamble = ""
+    #with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f:
+    with open(tmp_filename,'rb') as f:
        table_preamble = []
        for line_number, line in enumerate(f):        
            
-            if table_header in line:   
-                list_of_substrings = line.split(separator)             
+            if table_header in line.decode(file_encoding):   
+                list_of_substrings = line.decode(file_encoding).split(separator)             
                data_start = True  
                column_names = []
                for i, name in enumerate(list_of_substrings):
                    column_names.append(str(i)+'_'+name) 

-                print(line_number, len(column_names ))
+                #print(line_number, len(column_names ),'\n')
                break
            # Subdivide line into words, and join them by single space. 
            # I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
-            list_of_substrings = line.split()
+            list_of_substrings = line.decode(file_encoding).split()
            # TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
            #line = ' '.join(list_of_substrings+['\n'])
-            line = ' '.join(list_of_substrings)     
-            table_preamble.append(line)# += new_line  
+            #line = ' '.join(list_of_substrings)     
+            table_preamble.append(' '.join(list_of_substrings))# += new_line  

        header_dict["table_preamble"] = table_preamble

   
-    # TODO: it does not work with separater as none :(. fix for RGA
+    # TODO: it does not work with separator as none :(. fix for RGA
    try:
        df = pd.read_csv(tmp_filename, 
                        delimiter = separator, 
--- a/src/hdf5_lib.py
+++ b/src/hdf5_lib.py
@ -17,6 +17,14 @@ import h5py
 import yaml


+def progressBar(count_value, total, suffix=''):
+    bar_length = 100
+    filled_up_Length = int(round(bar_length* count_value / float(total)))
+    percentage = round(100.0 * count_value/float(total),1)
+    bar = '=' * filled_up_Length + '-' * (bar_length - filled_up_Length)
+    sys.stdout.write('[%s] %s%s ...%s\r' %(bar, percentage, '%', suffix))
+    sys.stdout.flush()
+
 def read_mtable_as_dataframe(filename):

    """ Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input .h5 file
@ -150,10 +158,11 @@ def get_parent_child_relationships(file: h5py.File):
    parent = ['']
    #values = [file.attrs['count']]
    # TODO: maybe we should make this more general and not dependent on file_list attribute? 
-    if 'file_list' in file.attrs.keys():
-        values = [len(file.attrs['file_list'])]
-    else:
-        values = [1]
+    #if 'file_list' in file.attrs.keys():
+    #    values = [len(file.attrs['file_list'])]
+    #else:
+    #    values = [1]
+    values = [len(file.keys())]

    def node_visitor(name,obj):
        #if isinstance(obj,h5py.Group):
@ -161,10 +170,12 @@ def get_parent_child_relationships(file: h5py.File):
            parent.append(obj.parent.name)
            #nodes.append(os.path.split(obj.name)[1])
            #parent.append(os.path.split(obj.parent.name)[1])
-            if isinstance(obj,h5py.Dataset) or not 'file_list' in obj.attrs.keys():
+            
+            if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys():
                values.append(1)
            else:
-                values.append(len(obj.attrs['file_list']))
+                values.append(len(obj.keys()))
+                #values.append(len(obj.attrs['file_list']))
    file.visititems(node_visitor)

    return nodes, parent, values    
@ -204,15 +215,20 @@ def annotate_root_dir(filename,annotation_dict: dict):
    
 import shutil

-def create_hdf5_file_from_filesystem_path(ofilename : str, 
+def create_hdf5_file_from_filesystem_path(config_param : dict , 
                                          input_file_system_path : str, 
                                          select_dir_keywords = [], 
                                          select_file_keywords =[], 
                                          top_sub_dir_mask : bool = True):
+#def create_hdf5_file_from_filesystem_path(output_filename : str, 
+#                                          input_file_system_path : str, 
+#                                          select_dir_keywords = [], 
+#                                          select_file_keywords =[], 
+#                                          top_sub_dir_mask : bool = True):

    """
-    Creates an .h5 file with name ofilename that preserves the directory tree (or folder structure) of given a filesystem path and 
-    a few file and directory keywords. The keywords enable filtering of directories and files that do not contain the specified keywords.
+    Creates an .h5 file with name "output_filename" that preserves the directory tree (or folder structure) of given a filesystem path.
+    When file and directory keywords are non-empty, the keywords enable filtering of directories and files that do not contain the specified keywords.

    In the .h5 file, only files that are admissible file formats will be stored in the form of datasets and attributes.

@ -237,35 +253,74 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,

    # Ensure OS compliant paths and keywords

+    # TODO: validate config_param dict, make sure output_filename is a valid file_path
+    group_id = config_param['group_id']
+    user_initials = config_param['user_initials']
+    created_at = config_file.created_at()
+    output_dir = config_param['output_dir']
+    output_filename = output_dir + config_file.output_filename_tempate(group_id,created_at,user_initials)
+
+    admissible_file_ext_list = list(config_file.select_file_readers(group_id).keys())
+
    if '/' in input_file_system_path:
        input_file_system_path = input_file_system_path.replace('/',os.sep)
    else:
        raise  ValueError('input_file_system_path needs to be specified using forward slashes "/".' )
    
    for i, keyword in enumerate(select_dir_keywords):
-        select_dir_keywords[i] = keyword.replace('/',os.sep)
+        select_dir_keywords[i] = keyword.replace('/',os.sep)          

+    # Visit each subdirectory from top to bottom, root directory defined by input_file_sytem_path to the lower
+    # level directories.

-    with h5py.File(ofilename, 'w') as h5file:        
+    # Constrain walkable paths on the specified directory tree by allowing walks that start from root
+    # through subdirectories specified by dir_keywords. This improves efficiency especially, in deep
+    # directory trees with many leaves.
+    paths = []
+    if top_sub_dir_mask:
+        for item in os.listdir(input_file_system_path):
+            if any([item in keyword for keyword in select_dir_keywords]):
+                paths.append(os.path.join(input_file_system_path,item))
+    else:
+        paths.append(input_file_system_path)

-        # Visit each subdirectory from top to bottom, root directory defined by input_file_sytem_path to the lower
-        # level directories.
-
-        # Constrain walkable paths on the specified directory tree by allowing walks that start from root
-        # through subdirectories specified by dir_keywords. This improves efficiency especially, in deep
-        # directory trees with many leaves.
-        paths = []
-        if top_sub_dir_mask:
-            for item in os.listdir(input_file_system_path):
-                if any([item in keyword for keyword in select_dir_keywords]):
-                    paths.append(os.path.join(input_file_system_path,item))
-        else:
-            paths.append(input_file_system_path)
+    with h5py.File(output_filename, 'w') as h5file:

        for item in paths:

            root_dir = input_file_system_path

+            # Create dictionary with directory-files pairs where files satisfy keyword and admisible type contraints
+            # It requires an extra pass over directory three and additional memory for dictionary, but it may be useful
+            # to speed up subsequent step and prune resulting directory tree.
+
+            # For each directory and/or subdirectory, keep files that satisfy file_keyword constraints, and store
+            # (directory_path, suitable files) relationships in a dictionary. 
+            file_paths_dict = {}  
+
+            check_file_ext = lambda filename: any([ext in filename for ext in admissible_file_ext_list])
+
+            for dirpath, _, filenames in os.walk(item,topdown=False):
+                file_paths_dict[dirpath] = []
+
+                # Check files that have an admissible extension and store them in admissible_filenames list
+                admissible_filenames = []
+                for fn in filenames:
+                    if check_file_ext(fn):
+                        admissible_filenames.append(fn)
+
+                if select_file_keywords:  # when select_file_keywords = [], all files are considered    
+                    for filename in admissible_filenames:                        
+                        # Do not consider files with types for which there is still no file_reader. TODO: extend file_reader library.
+                        #if not any([ext in filename for ext in admissible_file_ext_list]):
+                        #    continue
+
+                        # Add files with name, that contains any of the file_keywords
+                        if any([keyword in filename for keyword in select_file_keywords]):
+                                file_paths_dict[dirpath].append(filename)
+                else:
+                    file_paths_dict[dirpath] = admissible_filenames 
+    
            for node_number, node in enumerate(os.walk(item, topdown=True)):

                dirpath, dirnames, filenames_list = node
@ -277,26 +332,24 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
                # When select_file_keywords is an empty, i.e., [], do not apply any filter on the filenames.
                
                
-                filtered_filename_list = []
-                if select_file_keywords:                
-                    for filename in filenames_list:
-                        if any([keyword in filename for keyword in select_file_keywords]):
-                                filtered_filename_list.append(filename)
-                else:
-                    filtered_filename_list = filenames_list.copy()
+                #filtered_filename_list = []
+                #if select_file_keywords:                
+                #    for filename in filenames_list:
+                #        if any([keyword in filename for keyword in select_file_keywords]):
+                #                filtered_filename_list.append(filename)
+                #else:
+                #    filtered_filename_list = filenames_list.copy()

-                admissible_file_ext_list = list(config_file.ext_to_reader_dict.keys())
-
-                for filename in filtered_filename_list.copy():
-                    if not any([ext in filename for ext in admissible_file_ext_list]):
-                        filtered_filename_list.remove(filename)
+                filtered_filename_list = file_paths_dict.get(dirpath,filenames_list.copy())


                # Skip subdirectories that do not contain a keyword in the parameter 'select_dir_keywords' when it is nonempty    
                if select_dir_keywords:
                    #if (dirpath.count(os.sep) > offset) and not any([item in dirpath for item in select_dir_keywords]):
+                    #tail, dirname = os.path.split(dirpath)
+                    #if not any([item in dirname for item in select_dir_keywords]):
                    if not any([item in dirpath for item in select_dir_keywords]):
-                            continue
+                        continue

                group_name = dirpath.replace(os.sep,'/')
                group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/')
@ -308,46 +361,62 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,

                # TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)  
                
-                for filename in filtered_filename_list:
+                for filenumber, filename in enumerate(filtered_filename_list):
                    
                    # Get file extension (or file type)
                    file_name, file_ext = os.path.splitext(filename)

+                    #print(filename)
+
                    #try: 
                    if not 'h5' in filename:
-                        file_dict = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
+                        file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))

                        if not file_dict:
                            continue

-                        # Create group and add their attributes
-                        h5file[group_name].create_group(name=file_dict['name'])
-                        for key in file_dict['attributes_dict'].keys():
-                            
-                            # Represent string values as fixed length strings in the HDF5 file, which need
-                            # to be decoded as string when we read them. It provides better control than variable strings,
-                            # at the expense of flexibility.
-                            # https://docs.h5py.org/en/stable/strings.html
-                            value = file_dict['attributes_dict'][key]
-                            if isinstance(value,str):
-                                utf8_type = h5py.string_dtype('utf-8', len(value))
-                                value = np.array(value.encode('utf-8'),dtype=utf8_type)
+                        try:
+                            # Create group and add their attributes
+                            h5file[group_name].create_group(name=file_dict['name'])
+                            for key in file_dict['attributes_dict'].keys():
+                                
+                                # Represent string values as fixed length strings in the HDF5 file, which need
+                                # to be decoded as string when we read them. It provides better control than variable strings,
+                                # at the expense of flexibility.
+                                # https://docs.h5py.org/en/stable/strings.html
+                                value = file_dict['attributes_dict'][key]
+                                if isinstance(value,str):
+                                    utf8_type = h5py.string_dtype('utf-8', len(value))
+                                    value = np.array(value.encode('utf-8'),dtype=utf8_type)

-                            h5file[group_name][file_dict['name']].attrs.create(name=key,
-                                                                               data=value)
-                            
-                        # Add datasets to just created group
-                        for dataset in file_dict['datasets']:
-                            h5file[group_name][file_dict['name']].create_dataset(name  = dataset['name'], 
-                                                data  = dataset['data'],
-                                                #dtype = file_dict['dtype'],
-                                                shape = dataset['shape'])
+                                h5file[group_name][file_dict['name']].attrs.create(name=key,
+                                                                                data=value)
+                                
+                            # Add datasets to just created group
+                            for dataset in file_dict['datasets']:
+                                h5file[group_name][file_dict['name']].create_dataset(name  = dataset['name'], 
+                                                    data  = dataset['data'],
+                                                    #dtype = file_dict['dtype'],
+                                                    shape = dataset['shape'])
+
+                        except Exception as inst:                            
+                            # TODO: log when a file could not be stored as a dataset
+                            print(inst) 
                        
                    else:
-                        config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename), 
+                        config_file.select_file_readers(group_id)[file_ext](source_file_path = os.path.join(dirpath,filename), 
                                                    dest_file_obj = h5file, 
                                                    dest_group_name = group_name +'/'+filename)
-                    print(file_ext, ':)')
+                        #print(filename,file_ext, ':)')
+                    
+                        
+                    progressBar(filenumber,len(filtered_filename_list), 'Uploading files in ' + dirpath)
+            
+
+    
+    output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename)
+
+    return output_filename, output_yml_filename_path



--- a/src/hdf5_vis.py
+++ b/src/hdf5_vis.py
@ -140,7 +140,7 @@ def print_metadata(name, obj, folder_depth, yaml_dict):
            #group_dict[obj.name]["name"] = obj.name
            #group_dict[obj.name]["attributes"] =  attr_dict
            #group_dict[obj.name]["datasets"] =  {}
-            print(name)
+            #print(name)

            yaml_dict[obj.name] = group_dict
        elif isinstance(obj, h5py.Dataset):            
--- a/src/metadata_review_lib.py
+++ b/src/metadata_review_lib.py
@ -35,16 +35,26 @@ def get_review_status(filename_path):
            workflow_steps.append(line)
    return workflow_steps[-1]

-def checkout_review_branch(repo_obj,branch_name):    
+def checkout_review_branch(branch_name):    
    # Create a new branch
    #branch_name = 'metadata-review-by-'+initials
    head_commit = repo_obj.head.peel()# Get the commit hash associated with HEAD

-    if not branch_name in repo_obj.branches:
-        branch = repo_obj.create_branch(branch_name, head_commit)
-    else:
-        branch = repo_obj.branches[branch_name]    
-    repo_obj.checkout(branch)
+    checkout_branch_command = lambda branch_name : ['git','checkout', branch_name]
+    output = subprocess.run(checkout_branch_command(branch_name), capture_output=True,text=True,check=True)
+
+    print(output.stdout)
+
+    #if not branch_name in repo_obj.branches:
+    #    branch = repo_obj.create_branch(branch_name, head_commit)
+    #else:
+    #    branch = repo_obj.branches[branch_name]    
+    #repo_obj.checkout(branch)
+
+current_branch_command = ['git','branch','--show-current']    
+status_command = ['git','status']
+add_command = lambda add_list: ['git','add'] + add_list
+commit_command = lambda message: ['git','commit','-m', message]

 def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs):

@ -69,35 +79,55 @@ def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs):
        raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run take_yml_snapshot_of_hdf5_file(filename_path) ")

    # Initialize metadata review workflow
-    print("Create branch metadata-review-by-"+initials+"\n")
+    # print("Create branch metadata-review-by-"+initials+"\n")
+
+    #checkout_review_branch(branch_name)
    
+    # Check you are working at the right branch   
+    current_branch_command = ['git','branch','--show-current']
+    curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
+    if not branch_name in curr_branch.stdout:
+        raise ValueError("Please checkout the branch: "+branch_name+" via Git Bash Terminal while in the project's directory")
+        
    # Check if review file already exists and then check if it is still untracked
    review_yaml_file_path = os.path.join("review/",filename+YAML_EXT)
+
    if not os.path.exists(review_yaml_file_path):
        review_yaml_file_path = utils.make_file_copy(os.path.join(hdf5_file_path_tail,filename+YAML_EXT), 'review') 
-    #else:
-    #    raise Warning("the file " + os.path.join("review/",filename+YAML_EXT)+ " already exists. Delete this file to reinitialize the metadata review process.")
-

    review_yaml_file_path_tail, ext = os.path.splitext(review_yaml_file_path)

    with open(os.path.join(review_yaml_file_path_tail+"-review_status"+".txt"),'w') as f:
        f.write('under review')  

-    checkout_review_branch(repo_obj, branch_name)
+    # Stage review files and commit them to local repository
+    status = subprocess.run(status_command,capture_output=True,text=True,check=True)
+    untracked_files_for_review = []
+    for line in status.stdout.splitlines():
+        if 'review/' in line.decode('utf8'):
+            untracked_files_for_review.append(line)

-    status_dict = repo_obj.status()
-    for filepath, file_status in status_dict.items():
+    result = subprocess.run(add_command(untracked_files_for_review),capture_output=True,check=True)
+    message = 'Initialized metadata review.'
+    commit_output = subprocess.run(commit_command(message),capture_output=True,check=True)
+
+    print(commit_output.stdout)
+
+
+
+
+    #status_dict = repo_obj.status()
+    #for filepath, file_status in status_dict.items():
        # Identify keys associated to review files and stage them
-        if 'review/'+filename in filepath:
+    #    if 'review/'+filename in filepath:
            # Stage changes
-            repo_obj.index.add(filepath)
+    #        repo_obj.index.add(filepath)

-    author = config_file.author #default_signature
-    committer = config_file.committer
-    message = "Initialized metadata review process."
-    tree = repo_obj.index.write_tree()
-    oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
+    #author = config_file.author #default_signature
+    #committer = config_file.committer
+    #message = "Initialized metadata review process."
+    #tree = repo_obj.index.write_tree()
+    #oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])

    #print("Add and commit"+"\n") 

@ -119,41 +149,33 @@ def second_submit_metadata_review(review_yaml_file_path, reviewer_attrs):
    initials = reviewer_attrs['initials']
    branch_name = '-'.join([reviewer_attrs['type'],'review','by',initials])
    # TODO: replace with subprocess + git
-    checkout_review_branch(repo_obj, branch_name)
+    #checkout_review_branch(repo_obj, branch_name)

+    # Check you are working at the right branch    
+    curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
+    if not branch_name in curr_branch.stdout:
+        raise ValueError('Please checkout ' + branch_name + ' via Git Bash before submitting metadata review files. ')

-    #if any([status in get_review_status(filename_path) for status in ['under review','submitted']]):
-    #    filename_path_tail, filename_path_head = os.path.split(filename_path)
-    #    filename, ext = os.path.splitext(filename_path_head)
-    #    # TODO:    
-
-
-    ##
-    status_command = ['git','status']
-    add_command = lambda add_list: ['git','add'] + add_list
-    commit_command = lambda message: ['git','commit','-m', message]
-    #push_command = lambda repository,refspec: ['git','push',repository,refspec]
-
+    # Collect modified review files
    status = subprocess.run(status_command,capture_output=True,check=True)
-
-    files_to_add_list = []
+    modified_files = []
    for line in status.stdout.splitlines():
        # conver line from bytes to str
        tmp = line.decode("utf-8")
        if 'modified' in tmp and review_yaml_file_path in tmp:
-            files_to_add_list.append(tmp.split()[1])         
-    ##
+            modified_files.append(tmp.split()[1])         
    
+    # Stage modified files and commit them to local repository    
    review_yaml_file_path_tail, review_yaml_file_path_head = os.path.split(review_yaml_file_path)
    filename, ext = os.path.splitext(review_yaml_file_path_head)
-    if files_to_add_list:
+    if modified_files:
        review_status_file_path = os.path.join("review/",filename+"-review_status"+TXT_EXT)
        with open(review_status_file_path,'a') as f:
            f.write('\nsubmitted')

-        files_to_add_list.append(review_status_file_path)
+        modified_files.append(review_status_file_path)

-        result = subprocess.run(add_command(files_to_add_list),capture_output=True,check=True)
+        result = subprocess.run(add_command(modified_files),capture_output=True,check=True)
        message = 'Submitted metadata review.'
        commit_output = subprocess.run(commit_command(message),capture_output=True,check=True)

@ -163,20 +185,6 @@ def second_submit_metadata_review(review_yaml_file_path, reviewer_attrs):
        print('Nothing to commit.')


-    #status_dict = repo_obj.status()
-    #for filepath, file_status in status_dict.items():
-        # Identify keys associated to review files and stage them
-    #    if ('review/'+filename in filepath) and (file_status == pygit.GIT_STATUS_WT_MODIFIED):
-            # Stage changes
-    #        repo_obj.index.add(filepath)
-
-    #author = config_file.author #default_signature
-    #committer = config_file.committer
-    #message = "Submitted metadata review."
-    #tree = repo_obj.index.write_tree()
-    #oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
-
-

 def third_complete_metadata_review(reviewer_attrs):

--- a/workflow_data_integration.ipynb
+++ b/workflow_data_integration.ipynb
@ -1,41 +1,55 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import python packages and modules"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "# Set up project root directory\n",
+    "root_dir = os.path.abspath(os.curdir)\n",
+    "sys.path.append(root_dir)\n",
+    "\n",
+    "import src.hdf5_vis as hdf5_vis\n",
+    "import src.hdf5_lib as hdf5_lib\n",
+    "import input_files.config_file as config_file\n",
+    "\n",
+    "\n",
+    "output_dir = 'output_files/'\n",
+    "group_id = '5505'#'smog_chamber'#'5505'\n",
+    "user_initials = 'LL' #'NG' #'LL' # 'TBR'\n",
+    "\n",
+    "group_id = 'smog_chamber'#'5505'\n",
+    "user_initials = 'NG'#'LL' #'NG' #'LL' # 'TBR'\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Define input file path and keywords\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "2 26\n",
-      ".dat :)\n",
-      "2 26\n",
-      ".dat :)\n",
-      "2 26\n",
-      ".dat :)\n",
-      "2 26\n",
-      ".dat :)\n",
-      "Humidity_Sensors\n",
-      "Humidity_Sensors/2022\n",
-      "Humidity_Sensors/2022/01_Januar\n",
-      "Humidity_Sensors/2022/02_Februar\n",
-      "Humidity_Sensors/2022/03_März\n",
-      "Humidity_Sensors/2022/04_April\n",
-      "Humidity_Sensors/2022/05_Mai\n",
-      "Humidity_Sensors/2022/06_Juni\n",
-      "Humidity_Sensors/2022/07_Juli\n",
-      "Humidity_Sensors/2022/10_Oktober\n",
-      "Humidity_Sensors/2022/11_November\n",
-      "Humidity_Sensors/2022/12_Dezember\n",
-      "ICAD\n",
-      "ICAD/HONO\n",
-      "ICAD/HONO/2022\n",
-      "ICAD/HONO_prototype\n",
-      "ICAD/HONO_prototype/2022\n",
-      "ICAD/NO2\n",
-      "ICAD/NO2/2022\n"
+      "[==================================================--------------------------------------------------] 50.0% ...Uploading files in \\\\fs03\\Iron_Sulphate\\smps\\20220726\r"
     ]
    },
    {
@ -49,237 +63,113 @@
         "branchvalues": "remainder",
         "customdata": [
          "<br>",
-          "/Humidity_Sensors",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022/01_Januar",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/02_Februar",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/03_März",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/04_April",
-          "/Humidity_Sensors/2022/05_Mai",
-          "/Humidity_Sensors/2022/06_Juni",
-          "/Humidity_Sensors/2022/07_Juli",
-          "/Humidity_Sensors/2022/10_Oktober",
-          "/Humidity_Sensors/2022/11_November",
-          "/Humidity_Sensors/2022/12_Dezember",
-          "/ICAD",
-          "/ICAD/HONO",
-          "/ICAD/HONO/2022",
-          "/ICAD/HONO/2022/10_Oct",
-          "/ICAD/HONO/2022/11_Nov",
-          "/ICAD/HONO/2022/12_Dec",
-          "/ICAD/HONO_prototype",
-          "/ICAD/HONO_prototype/2022",
-          "/ICAD/HONO_prototype/2022/01_Jan",
-          "/ICAD/HONO_prototype/2022/02_Feb",
-          "/ICAD/NO2",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022/01_Jan",
-          "/ICAD/NO2/2022/02_Feb",
-          "/ICAD/NO2/2022/03_Mar",
-          "/ICAD/NO2/2022/04_Apr",
-          "/ICAD/NO2/2022/05_May",
-          "/ICAD/NO2/2022/06_June",
-          "/ICAD/NO2/2022/07_July",
-          "/ICAD/NO2/2022/10_Oct",
-          "/ICAD/NO2/2022/11_Nov",
-          "/ICAD/NO2/2022/12_Dec"
+          "/gas",
+          "/gas/20220726_000004_MSC_gases.txt",
+          "/gas/20220726_000004_MSC_gases.txt/categorial_variable_names",
+          "/gas/20220726_000004_MSC_gases.txt/categorical_variables",
+          "/gas/20220726_000004_MSC_gases.txt/numerical_variable_names",
+          "/gas/20220726_000004_MSC_gases.txt/numerical_variables",
+          "/gas/20220726_101617_MSC_gases.txt",
+          "/gas/20220726_101617_MSC_gases.txt/categorial_variable_names",
+          "/gas/20220726_101617_MSC_gases.txt/categorical_variables",
+          "/gas/20220726_101617_MSC_gases.txt/numerical_variable_names",
+          "/gas/20220726_101617_MSC_gases.txt/numerical_variables",
+          "/smps",
+          "/smps/20220726",
+          "/smps/20220726/20220726_mass.TXT",
+          "/smps/20220726/20220726_mass.TXT/categorial_variable_names",
+          "/smps/20220726/20220726_mass.TXT/categorical_variables",
+          "/smps/20220726/20220726_mass.TXT/numerical_variable_names",
+          "/smps/20220726/20220726_mass.TXT/numerical_variables",
+          "/smps/20220726/20220726_num.TXT",
+          "/smps/20220726/20220726_num.TXT/categorial_variable_names",
+          "/smps/20220726/20220726_num.TXT/categorical_variables",
+          "/smps/20220726/20220726_num.TXT/numerical_variable_names",
+          "/smps/20220726/20220726_num.TXT/numerical_variables"
         ],
         "hovertemplate": "<b>%{label} </b> <br> Count: %{value} <br> Path: %{customdata}",
         "labels": [
          "/",
-          "/Humidity_Sensors",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022/01_Januar",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/02_Februar",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/03_März",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorial_variable_names",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorical_variables",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variable_names",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variables",
-          "/Humidity_Sensors/2022/04_April",
-          "/Humidity_Sensors/2022/05_Mai",
-          "/Humidity_Sensors/2022/06_Juni",
-          "/Humidity_Sensors/2022/07_Juli",
-          "/Humidity_Sensors/2022/10_Oktober",
-          "/Humidity_Sensors/2022/11_November",
-          "/Humidity_Sensors/2022/12_Dezember",
-          "/ICAD",
-          "/ICAD/HONO",
-          "/ICAD/HONO/2022",
-          "/ICAD/HONO/2022/10_Oct",
-          "/ICAD/HONO/2022/11_Nov",
-          "/ICAD/HONO/2022/12_Dec",
-          "/ICAD/HONO_prototype",
-          "/ICAD/HONO_prototype/2022",
-          "/ICAD/HONO_prototype/2022/01_Jan",
-          "/ICAD/HONO_prototype/2022/02_Feb",
-          "/ICAD/NO2",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022/01_Jan",
-          "/ICAD/NO2/2022/02_Feb",
-          "/ICAD/NO2/2022/03_Mar",
-          "/ICAD/NO2/2022/04_Apr",
-          "/ICAD/NO2/2022/05_May",
-          "/ICAD/NO2/2022/06_June",
-          "/ICAD/NO2/2022/07_July",
-          "/ICAD/NO2/2022/10_Oct",
-          "/ICAD/NO2/2022/11_Nov",
-          "/ICAD/NO2/2022/12_Dec"
+          "/gas",
+          "/gas/20220726_000004_MSC_gases.txt",
+          "/gas/20220726_000004_MSC_gases.txt/categorial_variable_names",
+          "/gas/20220726_000004_MSC_gases.txt/categorical_variables",
+          "/gas/20220726_000004_MSC_gases.txt/numerical_variable_names",
+          "/gas/20220726_000004_MSC_gases.txt/numerical_variables",
+          "/gas/20220726_101617_MSC_gases.txt",
+          "/gas/20220726_101617_MSC_gases.txt/categorial_variable_names",
+          "/gas/20220726_101617_MSC_gases.txt/categorical_variables",
+          "/gas/20220726_101617_MSC_gases.txt/numerical_variable_names",
+          "/gas/20220726_101617_MSC_gases.txt/numerical_variables",
+          "/smps",
+          "/smps/20220726",
+          "/smps/20220726/20220726_mass.TXT",
+          "/smps/20220726/20220726_mass.TXT/categorial_variable_names",
+          "/smps/20220726/20220726_mass.TXT/categorical_variables",
+          "/smps/20220726/20220726_mass.TXT/numerical_variable_names",
+          "/smps/20220726/20220726_mass.TXT/numerical_variables",
+          "/smps/20220726/20220726_num.TXT",
+          "/smps/20220726/20220726_num.TXT/categorial_variable_names",
+          "/smps/20220726/20220726_num.TXT/categorical_variables",
+          "/smps/20220726/20220726_num.TXT/numerical_variable_names",
+          "/smps/20220726/20220726_num.TXT/numerical_variables"
         ],
         "name": "",
         "parents": [
          "",
          "/",
-          "/Humidity_Sensors",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022/01_Januar",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022/02_Februar",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022/03_März",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022",
-          "/Humidity_Sensors/2022",
+          "/gas",
+          "/gas/20220726_000004_MSC_gases.txt",
+          "/gas/20220726_000004_MSC_gases.txt",
+          "/gas/20220726_000004_MSC_gases.txt",
+          "/gas/20220726_000004_MSC_gases.txt",
+          "/gas",
+          "/gas/20220726_101617_MSC_gases.txt",
+          "/gas/20220726_101617_MSC_gases.txt",
+          "/gas/20220726_101617_MSC_gases.txt",
+          "/gas/20220726_101617_MSC_gases.txt",
          "/",
-          "/ICAD",
-          "/ICAD/HONO",
-          "/ICAD/HONO/2022",
-          "/ICAD/HONO/2022",
-          "/ICAD/HONO/2022",
-          "/ICAD",
-          "/ICAD/HONO_prototype",
-          "/ICAD/HONO_prototype/2022",
-          "/ICAD/HONO_prototype/2022",
-          "/ICAD",
-          "/ICAD/NO2",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022",
-          "/ICAD/NO2/2022"
+          "/smps",
+          "/smps/20220726",
+          "/smps/20220726/20220726_mass.TXT",
+          "/smps/20220726/20220726_mass.TXT",
+          "/smps/20220726/20220726_mass.TXT",
+          "/smps/20220726/20220726_mass.TXT",
+          "/smps/20220726",
+          "/smps/20220726/20220726_num.TXT",
+          "/smps/20220726/20220726_num.TXT",
+          "/smps/20220726/20220726_num.TXT",
+          "/smps/20220726/20220726_num.TXT"
         ],
         "root": {
          "color": "lightgrey"
         },
         "type": "treemap",
         "values": [
-          1,
-          1,
-          0,
-          5,
-          1,
-          1,
-          1,
-          1,
-          1,
-          14,
-          1,
-          1,
-          1,
-          1,
-          1,
-          1,
-          1,
-          1,
-          1,
-          1,
-          6,
-          1,
+          2,
+          2,
+          4,
          1,
          1,
          1,
          1,
          4,
-          9,
-          11,
-          3,
-          8,
-          17,
+          1,
+          1,
+          1,
+          1,
+          1,
          2,
-          1,
-          1,
-          0,
-          7,
-          8,
-          2,
-          1,
-          1,
-          3,
-          6,
-          1,
-          0,
-          3,
-          6,
-          5,
-          3,
          4,
-          6,
-          2,
-          5,
-          8,
-          2
+          1,
+          1,
+          1,
+          1,
+          4,
+          1,
+          1,
+          1,
+          1
         ]
        }
       ],
@ -1116,18 +1006,37 @@
    }
   ],
   "source": [
-    "import sys\n",
-    "import os\n",
-    "root_dir = os.path.abspath(os.curdir)\n",
-    "sys.path.append(root_dir)\n",
+    "#input_file_dir = '//fs101/5505/People/Juan/TypicalBeamTime'\n",
+    "#select_file_keywords=[]\n",
+    "#select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']\n",
    "\n",
-    "import src.hdf5_vis as hdf5_vis\n",
-    "import src.hdf5_lib as hdf5_lib\n",
    "\n",
-    "output_filename_path, output_yml_filename_path = hdf5_lib.main()\n",
+    "#input_file_dir  = '//fs101/5505/Data'    \n",
+    "#select_dir_keywords  = ['Lopap', 'Humidity_Sensors/2022', 'ICAD/HONO/2022', 'ICAD/NO2/2022', 'T200_NOX', 'T360U_CO2']\n",
+    "#select_file_keywords = ['2022-03-25','2022_03_25','20220325']\n",
+    "\n",
+    "\n",
+    "input_file_dir = '//fs03/Iron_Sulphate'\n",
+    "select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26']\n",
+    "#select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps']    \n",
+    "\n",
+    "select_file_keywords = ['20220726','2022.07.26']\n",
+    "\n",
+    "config_param = {'group_id' : group_id, 'user_initials' : user_initials, 'output_dir': output_dir}\n",
+    "\n",
+    "\n",
+    "output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(config_param,\n",
+    "                                                                                                input_file_dir,\n",
+    "                                                                                                select_dir_keywords,\n",
+    "                                                                                                select_file_keywords)\n",
    "\n",
    "hdf5_vis.display_group_hierarchy_on_a_treemap(output_filename_path)"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
  }
 ],
 "metadata": {
--- a/workflow_data_owner_review.ipynb
+++ b/workflow_data_owner_review.ipynb
@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import libraries and modules\n",
+    "\n",
+    "* Excecute (or Run) Cell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import os\n",
+    "root_dir = os.path.abspath(os.curdir)\n",
+    "sys.path.append(root_dir)\n",
+    "\n",
+    "import src.metadata_review_lib as metadata_review_lib"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 1: initialize metadata review.\n",
+    "\n",
+    "* Specify hdf5 file whose metadata is to be reviewed by editing the string variable `hdf5_file_path`.\n",
+    "* Edit reviewer attributes, i.e., the dict variable `reviewer_attrs` with your own initials and role.\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Create branch metadata-review-by-NG\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "('Please checkout the branch: ', 'data-owner-review-by-NG', \" via Git Bash Terminal while in the project's directory\")",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[2], line 12\u001b[0m\n\u001b[0;32m      7\u001b[0m reviewer_attrs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minitials\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNG\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m      8\u001b[0m                   \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata-owner\u001b[39m\u001b[38;5;124m'\u001b[39m}\n\u001b[0;32m     10\u001b[0m \u001b[38;5;66;03m#output_filename_path, output_yml_filename_path = hdf5_lib.main()\u001b[39;00m\n\u001b[1;32m---> 12\u001b[0m review_yaml_file_path \u001b[38;5;241m=\u001b[39m \u001b[43mmetadata_review_lib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfirst_initialize_metadata_review\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhdf5_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreviewer_attrs\u001b[49m\u001b[43m)\u001b[49m   \n\u001b[0;32m     14\u001b[0m \u001b[38;5;28mprint\u001b[39m(review_yaml_file_path) \n",
+      "File \u001b[1;32mc:\\Users\\florez_j\\Documents\\GitLab\\functionspython\\src\\metadata_review_lib.py:89\u001b[0m, in \u001b[0;36mfirst_initialize_metadata_review\u001b[1;34m(hdf5_file_path, reviewer_attrs)\u001b[0m\n\u001b[0;32m     87\u001b[0m curr_branch \u001b[38;5;241m=\u001b[39m subprocess\u001b[38;5;241m.\u001b[39mrun(current_branch_command,capture_output\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,check\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m     88\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m branch_name \u001b[38;5;129;01min\u001b[39;00m curr_branch\u001b[38;5;241m.\u001b[39mstdout:\n\u001b[1;32m---> 89\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease checkout the branch: \u001b[39m\u001b[38;5;124m\"\u001b[39m,branch_name,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m via Git Bash Terminal while in the project\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms directory\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m     91\u001b[0m \u001b[38;5;66;03m# Check if review file already exists and then check if it is still untracked\u001b[39;00m\n\u001b[0;32m     92\u001b[0m review_yaml_file_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreview/\u001b[39m\u001b[38;5;124m\"\u001b[39m,filename\u001b[38;5;241m+\u001b[39mYAML_EXT)\n",
+      "\u001b[1;31mValueError\u001b[0m: ('Please checkout the branch: ', 'data-owner-review-by-NG', \" via Git Bash Terminal while in the project's directory\")"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "#hdf5_file_path = \"output_files/unified_file_smog_chamber_2024-03-25_UTC-OFST_+0100_NG.h5\"\n",
+    "#yml_file_path = \"output_files/unified_file_smog_chamber_2024-03-25_UTC-OFST_+0100_NG.yaml\"\n",
+    "\n",
+    "hdf5_file_path = \"output_files/unified_file_smog_chamber_2024-04-03_UTC-OFST_+0200_NG.h5\"\n",
+    "yml_file_path = \"output_files/unified_file_smog_chamber_2024-04-03_UTC-OFST_+0200_NG.yaml\"\n",
+    "\n",
+    "reviewer_attrs = {'initials': 'NG',\n",
+    "                  'type': 'data-owner'}\n",
+    "\n",
+    "#output_filename_path, output_yml_filename_path = hdf5_lib.main()\n",
+    "\n",
+    "review_yaml_file_path = metadata_review_lib.first_initialize_metadata_review(hdf5_file_path, reviewer_attrs)   \n",
+    "\n",
+    "print(review_yaml_file_path) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 2: Submit metadata review. \n",
+    "\n",
+    "* Edit yaml file in review folder and save changes\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metadata_review_lib.second_submit_metadata_review(review_yaml_file_path,reviewer_attrs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 3: Update hdf5 file metadata w/ submitted review yaml file.\n",
+    "\n",
+    "* Make sure previous step was carried out properly.\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "metadata_review_lib.third_update_hdf5_file_with_review(hdf5_file_path, review_yaml_file_path, reviewer_attrs)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step 4: Complete data-owner review. Update remote repository\n",
+    "\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metadata_review_lib.fourth_complete_metadata_review(reviewer_attrs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "test_atmos_chem_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/workflow_metadata_reviewer.ipynb
+++ b/workflow_metadata_reviewer.ipynb
@ -0,0 +1,18 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}