Resolved incoming changes from main branch
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
*.pyc
|
||||
__pycache__/
|
||||
*.h5
|
||||
tmp_files/
|
||||
tmp_files/
|
||||
*.ipynb
|
@ -14,85 +14,32 @@ import pygit2 as pygit
|
||||
author = pygit.Signature('Florez Ospina Juan Felipe', 'juan.florez-ospina@psi.ch')
|
||||
committer = pygit.Signature('Florez Ospina Juan Felipe', 'juan.florez-ospina@psi.ch')
|
||||
|
||||
group_id = 'smog_chamber'
|
||||
|
||||
#group_id = '5505'
|
||||
|
||||
output_filename_tempate = lambda group_id, timestamp,user_initials : '_'.join(['unified_file',group_id,timestamp,user_initials])+'.h5'
|
||||
|
||||
def created_at():
|
||||
|
||||
now = datetime.now()
|
||||
# Populate now object with time zone infotmation obtained from the local system
|
||||
now_tz_aware = now.astimezone()
|
||||
tz = now_tz_aware.strftime('%z')
|
||||
#created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S')+'_utcoffset_' + tz
|
||||
created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz
|
||||
# Make created at timestamp with tz information
|
||||
#created_at = now.isoformat()
|
||||
|
||||
usecase = 1
|
||||
|
||||
if usecase == 1:
|
||||
|
||||
group_id == 'smog_chamber'
|
||||
user_initials = 'NG'
|
||||
#from smog_chamber_file_reader import read_txt_files_as_dict
|
||||
#from g5505_file_reader import copy_file_in_group
|
||||
#select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps']
|
||||
inputfile_dir = '\\\\fs03\\Iron_Sulphate'
|
||||
inputfile_dir = '//fs03/Iron_Sulphate'
|
||||
#select_dir_keywords = ['gas','smps\\20220726','htof\\2022.07.26','ptr\\2022.07.26','ams\\2022.07.26']
|
||||
select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26']
|
||||
select_file_keywords = ['20220726','2022.07.26']
|
||||
|
||||
outputfile_dir = 'output_files'
|
||||
|
||||
output_filename = output_filename_tempate(group_id,created_at,user_initials) #'test_smog_chamber_v14.h5'
|
||||
#output_filename = 'unified_file_smog_chamber_2024-03-19_UTC-OFST_+0100_NG.h5'
|
||||
now = datetime.now()
|
||||
# Populate now object with time zone infotmation obtained from the local system
|
||||
now_tz_aware = now.astimezone()
|
||||
tz = now_tz_aware.strftime('%z')
|
||||
#created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S')+'_utcoffset_' + tz
|
||||
created_at = now_tz_aware.strftime('%Y-%m-%d')+'_UTC-OFST_' + tz
|
||||
# Make created at timestamp with tz information
|
||||
#created_at = now.isoformat()
|
||||
return created_at
|
||||
|
||||
|
||||
ext_to_reader_dict = {'.txt': scf_reader.read_txt_files_as_dict,
|
||||
def select_file_readers(group_id):
|
||||
if group_id == '5505':
|
||||
ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
|
||||
'.txt': g5505f_reader.read_txt_files_as_dict,
|
||||
'.dat': g5505f_reader.read_txt_files_as_dict,
|
||||
'.h5': g5505f_reader.copy_file_in_group}
|
||||
elif group_id == 'smog_chamber':
|
||||
ext_to_reader_dict = {'.txt': scf_reader.read_txt_files_as_dict,
|
||||
'.TXT': scf_reader.read_txt_files_as_dict,
|
||||
'.h5': g5505f_reader.copy_file_in_group}
|
||||
|
||||
elif usecase == 2 :
|
||||
|
||||
group_id == '5505'
|
||||
user_initials = 'TBR'
|
||||
outputfile_dir = 'output_files'
|
||||
#output_filename = 'test_sls_data_v8.h5'
|
||||
inputfile_dir = '//fs101/5505/People/Juan/TypicalBeamTime'
|
||||
select_file_keywords=[]
|
||||
select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
|
||||
|
||||
output_filename = output_filename_tempate(group_id,created_at,user_initials)
|
||||
#output_filename = 'unified_file_5505_2024-03-19_UTC-OFST_+0100_TBR.h5'
|
||||
|
||||
ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
|
||||
'.txt': g5505f_reader.read_txt_files_as_dict,
|
||||
'.dat': g5505f_reader.read_txt_files_as_dict,
|
||||
'.h5': g5505f_reader.copy_file_in_group}
|
||||
elif usecase == 3:
|
||||
user_initials = 'LL'
|
||||
outputfile_dir = 'output_files'
|
||||
output_filename = output_filename_tempate(group_id,created_at,user_initials)
|
||||
|
||||
inputfile_dir = '//fs101/5505/Data'
|
||||
|
||||
#select_dir_keywords = ['Lopap', 'Humidity_Sensors', 'ICAD/HONO', 'ICAD/NO2', 'T200_NOX', 'T360U_CO2']
|
||||
# TODO: make sure in the code composite keywords are broken down into single keywords
|
||||
|
||||
##select_dir_keywords = ['Humidity_Sensors','ICAD/HONO','ICAD/NO2']
|
||||
select_dir_keywords = ['Humidity_Sensors/2022','ICAD/HONO/2022','ICAD/NO2/2022', '2022/01_Jan', '2022/02_Feb', '2022/03_März']
|
||||
|
||||
dates = pd.read_excel(os.path.abspath(os.path.join('input_files','date_experiments_for Juan.xlsx')))
|
||||
|
||||
select_file_keywords=[item.strftime('%Y-%m-%d') for item in dates.loc[0:2,'experiment_date']]
|
||||
select_file_keywords= select_file_keywords + [item.strftime('%Y%m%d') for item in dates.loc[0:2,'experiment_date']]
|
||||
|
||||
ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
|
||||
'.txt': g5505f_reader.read_txt_files_as_dict,
|
||||
'.dat': g5505f_reader.read_txt_files_as_dict,
|
||||
'.h5': g5505f_reader.copy_file_in_group}
|
||||
|
||||
return ext_to_reader_dict
|
||||
|
||||
|
@ -93,10 +93,12 @@ def read_txt_files_as_dict(filename : str ):
|
||||
file_encoding = 'latin-1'
|
||||
elif 'ICAD' in filename and 'HONO' in filename:
|
||||
table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource'
|
||||
separator = '\t'
|
||||
separator = '\t'
|
||||
file_encoding = 'latin-1'
|
||||
elif 'ICAD' in filename and 'NO2' in filename:
|
||||
table_header = 'Start Date/Time (UTC) Duration (s) NO2 (ppb) NO2 Uncertainty (ppb) H2O (ppb) H2O Uncertainty (ppb) CHOCHO (ppb) CHOCHO Uncertainty (ppb) File Number Light Intensity #ICEDOAS iter. Cell Pressure Ambient Pressure Cell Temp Spec Temp Lat Lon Height Speed GPSQuality 0-Air Ref. Time 0-Air Ref. Duration 0-Air Ref. File Number 0-Air Ref. Intensity 0-Air Ref. Rel Intensity 0-Air Ref. Intensity valid MeasMode SampleSource'
|
||||
separator = '\t'
|
||||
file_encoding = 'latin-1'
|
||||
else:
|
||||
return {}
|
||||
#raise ValueError('intrument_folder must be set as a either "RGA" or "Pressure"')
|
||||
@ -107,33 +109,32 @@ def read_txt_files_as_dict(filename : str ):
|
||||
# Work with copy of the file for safety
|
||||
tmp_filename = utils.make_file_copy(source_file_path=filename)
|
||||
|
||||
with open(tmp_filename,'r',encoding=file_encoding,errors='ignore') as f:
|
||||
#file_encoding = f.encoding
|
||||
#table_preamble = ""
|
||||
#with open(tmp_filename,'rb',encoding=file_encoding,errors='ignore') as f:
|
||||
with open(tmp_filename,'rb') as f:
|
||||
table_preamble = []
|
||||
for line_number, line in enumerate(f):
|
||||
|
||||
if table_header in line:
|
||||
list_of_substrings = line.split(separator)
|
||||
if table_header in line.decode(file_encoding):
|
||||
list_of_substrings = line.decode(file_encoding).split(separator)
|
||||
data_start = True
|
||||
column_names = []
|
||||
for i, name in enumerate(list_of_substrings):
|
||||
column_names.append(str(i)+'_'+name)
|
||||
|
||||
print(line_number, len(column_names ))
|
||||
#print(line_number, len(column_names ),'\n')
|
||||
break
|
||||
# Subdivide line into words, and join them by single space.
|
||||
# I asumme this can produce a cleaner line that contains no weird separator characters \t \r or extra spaces and so on.
|
||||
list_of_substrings = line.split()
|
||||
list_of_substrings = line.decode(file_encoding).split()
|
||||
# TODO: ideally we should use a multilinear string but the yalm parser is not recognizing \n as special character
|
||||
#line = ' '.join(list_of_substrings+['\n'])
|
||||
line = ' '.join(list_of_substrings)
|
||||
table_preamble.append(line)# += new_line
|
||||
#line = ' '.join(list_of_substrings)
|
||||
table_preamble.append(' '.join(list_of_substrings))# += new_line
|
||||
|
||||
header_dict["table_preamble"] = table_preamble
|
||||
|
||||
|
||||
# TODO: it does not work with separater as none :(. fix for RGA
|
||||
# TODO: it does not work with separator as none :(. fix for RGA
|
||||
try:
|
||||
df = pd.read_csv(tmp_filename,
|
||||
delimiter = separator,
|
||||
|
193
src/hdf5_lib.py
193
src/hdf5_lib.py
@ -17,6 +17,14 @@ import h5py
|
||||
import yaml
|
||||
|
||||
|
||||
def progressBar(count_value, total, suffix=''):
|
||||
bar_length = 100
|
||||
filled_up_Length = int(round(bar_length* count_value / float(total)))
|
||||
percentage = round(100.0 * count_value/float(total),1)
|
||||
bar = '=' * filled_up_Length + '-' * (bar_length - filled_up_Length)
|
||||
sys.stdout.write('[%s] %s%s ...%s\r' %(bar, percentage, '%', suffix))
|
||||
sys.stdout.flush()
|
||||
|
||||
def read_mtable_as_dataframe(filename):
|
||||
|
||||
""" Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input .h5 file
|
||||
@ -150,10 +158,11 @@ def get_parent_child_relationships(file: h5py.File):
|
||||
parent = ['']
|
||||
#values = [file.attrs['count']]
|
||||
# TODO: maybe we should make this more general and not dependent on file_list attribute?
|
||||
if 'file_list' in file.attrs.keys():
|
||||
values = [len(file.attrs['file_list'])]
|
||||
else:
|
||||
values = [1]
|
||||
#if 'file_list' in file.attrs.keys():
|
||||
# values = [len(file.attrs['file_list'])]
|
||||
#else:
|
||||
# values = [1]
|
||||
values = [len(file.keys())]
|
||||
|
||||
def node_visitor(name,obj):
|
||||
#if isinstance(obj,h5py.Group):
|
||||
@ -161,10 +170,12 @@ def get_parent_child_relationships(file: h5py.File):
|
||||
parent.append(obj.parent.name)
|
||||
#nodes.append(os.path.split(obj.name)[1])
|
||||
#parent.append(os.path.split(obj.parent.name)[1])
|
||||
if isinstance(obj,h5py.Dataset) or not 'file_list' in obj.attrs.keys():
|
||||
|
||||
if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys():
|
||||
values.append(1)
|
||||
else:
|
||||
values.append(len(obj.attrs['file_list']))
|
||||
values.append(len(obj.keys()))
|
||||
#values.append(len(obj.attrs['file_list']))
|
||||
file.visititems(node_visitor)
|
||||
|
||||
return nodes, parent, values
|
||||
@ -204,15 +215,20 @@ def annotate_root_dir(filename,annotation_dict: dict):
|
||||
|
||||
import shutil
|
||||
|
||||
def create_hdf5_file_from_filesystem_path(ofilename : str,
|
||||
def create_hdf5_file_from_filesystem_path(config_param : dict ,
|
||||
input_file_system_path : str,
|
||||
select_dir_keywords = [],
|
||||
select_file_keywords =[],
|
||||
top_sub_dir_mask : bool = True):
|
||||
#def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
# input_file_system_path : str,
|
||||
# select_dir_keywords = [],
|
||||
# select_file_keywords =[],
|
||||
# top_sub_dir_mask : bool = True):
|
||||
|
||||
"""
|
||||
Creates an .h5 file with name ofilename that preserves the directory tree (or folder structure) of given a filesystem path and
|
||||
a few file and directory keywords. The keywords enable filtering of directories and files that do not contain the specified keywords.
|
||||
Creates an .h5 file with name "output_filename" that preserves the directory tree (or folder structure) of given a filesystem path.
|
||||
When file and directory keywords are non-empty, the keywords enable filtering of directories and files that do not contain the specified keywords.
|
||||
|
||||
In the .h5 file, only files that are admissible file formats will be stored in the form of datasets and attributes.
|
||||
|
||||
@ -237,35 +253,74 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
|
||||
|
||||
# Ensure OS compliant paths and keywords
|
||||
|
||||
# TODO: validate config_param dict, make sure output_filename is a valid file_path
|
||||
group_id = config_param['group_id']
|
||||
user_initials = config_param['user_initials']
|
||||
created_at = config_file.created_at()
|
||||
output_dir = config_param['output_dir']
|
||||
output_filename = output_dir + config_file.output_filename_tempate(group_id,created_at,user_initials)
|
||||
|
||||
admissible_file_ext_list = list(config_file.select_file_readers(group_id).keys())
|
||||
|
||||
if '/' in input_file_system_path:
|
||||
input_file_system_path = input_file_system_path.replace('/',os.sep)
|
||||
else:
|
||||
raise ValueError('input_file_system_path needs to be specified using forward slashes "/".' )
|
||||
|
||||
for i, keyword in enumerate(select_dir_keywords):
|
||||
select_dir_keywords[i] = keyword.replace('/',os.sep)
|
||||
select_dir_keywords[i] = keyword.replace('/',os.sep)
|
||||
|
||||
# Visit each subdirectory from top to bottom, root directory defined by input_file_sytem_path to the lower
|
||||
# level directories.
|
||||
|
||||
with h5py.File(ofilename, 'w') as h5file:
|
||||
# Constrain walkable paths on the specified directory tree by allowing walks that start from root
|
||||
# through subdirectories specified by dir_keywords. This improves efficiency especially, in deep
|
||||
# directory trees with many leaves.
|
||||
paths = []
|
||||
if top_sub_dir_mask:
|
||||
for item in os.listdir(input_file_system_path):
|
||||
if any([item in keyword for keyword in select_dir_keywords]):
|
||||
paths.append(os.path.join(input_file_system_path,item))
|
||||
else:
|
||||
paths.append(input_file_system_path)
|
||||
|
||||
# Visit each subdirectory from top to bottom, root directory defined by input_file_sytem_path to the lower
|
||||
# level directories.
|
||||
|
||||
# Constrain walkable paths on the specified directory tree by allowing walks that start from root
|
||||
# through subdirectories specified by dir_keywords. This improves efficiency especially, in deep
|
||||
# directory trees with many leaves.
|
||||
paths = []
|
||||
if top_sub_dir_mask:
|
||||
for item in os.listdir(input_file_system_path):
|
||||
if any([item in keyword for keyword in select_dir_keywords]):
|
||||
paths.append(os.path.join(input_file_system_path,item))
|
||||
else:
|
||||
paths.append(input_file_system_path)
|
||||
with h5py.File(output_filename, 'w') as h5file:
|
||||
|
||||
for item in paths:
|
||||
|
||||
root_dir = input_file_system_path
|
||||
|
||||
# Create dictionary with directory-files pairs where files satisfy keyword and admisible type contraints
|
||||
# It requires an extra pass over directory three and additional memory for dictionary, but it may be useful
|
||||
# to speed up subsequent step and prune resulting directory tree.
|
||||
|
||||
# For each directory and/or subdirectory, keep files that satisfy file_keyword constraints, and store
|
||||
# (directory_path, suitable files) relationships in a dictionary.
|
||||
file_paths_dict = {}
|
||||
|
||||
check_file_ext = lambda filename: any([ext in filename for ext in admissible_file_ext_list])
|
||||
|
||||
for dirpath, _, filenames in os.walk(item,topdown=False):
|
||||
file_paths_dict[dirpath] = []
|
||||
|
||||
# Check files that have an admissible extension and store them in admissible_filenames list
|
||||
admissible_filenames = []
|
||||
for fn in filenames:
|
||||
if check_file_ext(fn):
|
||||
admissible_filenames.append(fn)
|
||||
|
||||
if select_file_keywords: # when select_file_keywords = [], all files are considered
|
||||
for filename in admissible_filenames:
|
||||
# Do not consider files with types for which there is still no file_reader. TODO: extend file_reader library.
|
||||
#if not any([ext in filename for ext in admissible_file_ext_list]):
|
||||
# continue
|
||||
|
||||
# Add files with name, that contains any of the file_keywords
|
||||
if any([keyword in filename for keyword in select_file_keywords]):
|
||||
file_paths_dict[dirpath].append(filename)
|
||||
else:
|
||||
file_paths_dict[dirpath] = admissible_filenames
|
||||
|
||||
for node_number, node in enumerate(os.walk(item, topdown=True)):
|
||||
|
||||
dirpath, dirnames, filenames_list = node
|
||||
@ -277,26 +332,24 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
|
||||
# When select_file_keywords is an empty, i.e., [], do not apply any filter on the filenames.
|
||||
|
||||
|
||||
filtered_filename_list = []
|
||||
if select_file_keywords:
|
||||
for filename in filenames_list:
|
||||
if any([keyword in filename for keyword in select_file_keywords]):
|
||||
filtered_filename_list.append(filename)
|
||||
else:
|
||||
filtered_filename_list = filenames_list.copy()
|
||||
#filtered_filename_list = []
|
||||
#if select_file_keywords:
|
||||
# for filename in filenames_list:
|
||||
# if any([keyword in filename for keyword in select_file_keywords]):
|
||||
# filtered_filename_list.append(filename)
|
||||
#else:
|
||||
# filtered_filename_list = filenames_list.copy()
|
||||
|
||||
admissible_file_ext_list = list(config_file.ext_to_reader_dict.keys())
|
||||
|
||||
for filename in filtered_filename_list.copy():
|
||||
if not any([ext in filename for ext in admissible_file_ext_list]):
|
||||
filtered_filename_list.remove(filename)
|
||||
filtered_filename_list = file_paths_dict.get(dirpath,filenames_list.copy())
|
||||
|
||||
|
||||
# Skip subdirectories that do not contain a keyword in the parameter 'select_dir_keywords' when it is nonempty
|
||||
if select_dir_keywords:
|
||||
#if (dirpath.count(os.sep) > offset) and not any([item in dirpath for item in select_dir_keywords]):
|
||||
#tail, dirname = os.path.split(dirpath)
|
||||
#if not any([item in dirname for item in select_dir_keywords]):
|
||||
if not any([item in dirpath for item in select_dir_keywords]):
|
||||
continue
|
||||
continue
|
||||
|
||||
group_name = dirpath.replace(os.sep,'/')
|
||||
group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/')
|
||||
@ -308,46 +361,62 @@ def create_hdf5_file_from_filesystem_path(ofilename : str,
|
||||
|
||||
# TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)
|
||||
|
||||
for filename in filtered_filename_list:
|
||||
for filenumber, filename in enumerate(filtered_filename_list):
|
||||
|
||||
# Get file extension (or file type)
|
||||
file_name, file_ext = os.path.splitext(filename)
|
||||
|
||||
#print(filename)
|
||||
|
||||
#try:
|
||||
if not 'h5' in filename:
|
||||
file_dict = config_file.ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||
file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
|
||||
|
||||
if not file_dict:
|
||||
continue
|
||||
|
||||
# Create group and add their attributes
|
||||
h5file[group_name].create_group(name=file_dict['name'])
|
||||
for key in file_dict['attributes_dict'].keys():
|
||||
|
||||
# Represent string values as fixed length strings in the HDF5 file, which need
|
||||
# to be decoded as string when we read them. It provides better control than variable strings,
|
||||
# at the expense of flexibility.
|
||||
# https://docs.h5py.org/en/stable/strings.html
|
||||
value = file_dict['attributes_dict'][key]
|
||||
if isinstance(value,str):
|
||||
utf8_type = h5py.string_dtype('utf-8', len(value))
|
||||
value = np.array(value.encode('utf-8'),dtype=utf8_type)
|
||||
try:
|
||||
# Create group and add their attributes
|
||||
h5file[group_name].create_group(name=file_dict['name'])
|
||||
for key in file_dict['attributes_dict'].keys():
|
||||
|
||||
# Represent string values as fixed length strings in the HDF5 file, which need
|
||||
# to be decoded as string when we read them. It provides better control than variable strings,
|
||||
# at the expense of flexibility.
|
||||
# https://docs.h5py.org/en/stable/strings.html
|
||||
value = file_dict['attributes_dict'][key]
|
||||
if isinstance(value,str):
|
||||
utf8_type = h5py.string_dtype('utf-8', len(value))
|
||||
value = np.array(value.encode('utf-8'),dtype=utf8_type)
|
||||
|
||||
h5file[group_name][file_dict['name']].attrs.create(name=key,
|
||||
data=value)
|
||||
|
||||
# Add datasets to just created group
|
||||
for dataset in file_dict['datasets']:
|
||||
h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'],
|
||||
data = dataset['data'],
|
||||
#dtype = file_dict['dtype'],
|
||||
shape = dataset['shape'])
|
||||
h5file[group_name][file_dict['name']].attrs.create(name=key,
|
||||
data=value)
|
||||
|
||||
# Add datasets to just created group
|
||||
for dataset in file_dict['datasets']:
|
||||
h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'],
|
||||
data = dataset['data'],
|
||||
#dtype = file_dict['dtype'],
|
||||
shape = dataset['shape'])
|
||||
|
||||
except Exception as inst:
|
||||
# TODO: log when a file could not be stored as a dataset
|
||||
print(inst)
|
||||
|
||||
else:
|
||||
config_file.ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename),
|
||||
config_file.select_file_readers(group_id)[file_ext](source_file_path = os.path.join(dirpath,filename),
|
||||
dest_file_obj = h5file,
|
||||
dest_group_name = group_name +'/'+filename)
|
||||
print(file_ext, ':)')
|
||||
#print(filename,file_ext, ':)')
|
||||
|
||||
|
||||
progressBar(filenumber,len(filtered_filename_list), 'Uploading files in ' + dirpath)
|
||||
|
||||
|
||||
|
||||
output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename)
|
||||
|
||||
return output_filename, output_yml_filename_path
|
||||
|
||||
|
||||
|
||||
|
@ -140,7 +140,7 @@ def print_metadata(name, obj, folder_depth, yaml_dict):
|
||||
#group_dict[obj.name]["name"] = obj.name
|
||||
#group_dict[obj.name]["attributes"] = attr_dict
|
||||
#group_dict[obj.name]["datasets"] = {}
|
||||
print(name)
|
||||
#print(name)
|
||||
|
||||
yaml_dict[obj.name] = group_dict
|
||||
elif isinstance(obj, h5py.Dataset):
|
||||
|
@ -35,16 +35,26 @@ def get_review_status(filename_path):
|
||||
workflow_steps.append(line)
|
||||
return workflow_steps[-1]
|
||||
|
||||
def checkout_review_branch(repo_obj,branch_name):
|
||||
def checkout_review_branch(branch_name):
|
||||
# Create a new branch
|
||||
#branch_name = 'metadata-review-by-'+initials
|
||||
head_commit = repo_obj.head.peel()# Get the commit hash associated with HEAD
|
||||
|
||||
if not branch_name in repo_obj.branches:
|
||||
branch = repo_obj.create_branch(branch_name, head_commit)
|
||||
else:
|
||||
branch = repo_obj.branches[branch_name]
|
||||
repo_obj.checkout(branch)
|
||||
checkout_branch_command = lambda branch_name : ['git','checkout', branch_name]
|
||||
output = subprocess.run(checkout_branch_command(branch_name), capture_output=True,text=True,check=True)
|
||||
|
||||
print(output.stdout)
|
||||
|
||||
#if not branch_name in repo_obj.branches:
|
||||
# branch = repo_obj.create_branch(branch_name, head_commit)
|
||||
#else:
|
||||
# branch = repo_obj.branches[branch_name]
|
||||
#repo_obj.checkout(branch)
|
||||
|
||||
current_branch_command = ['git','branch','--show-current']
|
||||
status_command = ['git','status']
|
||||
add_command = lambda add_list: ['git','add'] + add_list
|
||||
commit_command = lambda message: ['git','commit','-m', message]
|
||||
|
||||
def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs):
|
||||
|
||||
@ -69,35 +79,55 @@ def first_initialize_metadata_review(hdf5_file_path, reviewer_attrs):
|
||||
raise ValueError("metadata review cannot be initialized. The associated .yaml file under review was not found. Run take_yml_snapshot_of_hdf5_file(filename_path) ")
|
||||
|
||||
# Initialize metadata review workflow
|
||||
print("Create branch metadata-review-by-"+initials+"\n")
|
||||
# print("Create branch metadata-review-by-"+initials+"\n")
|
||||
|
||||
#checkout_review_branch(branch_name)
|
||||
|
||||
# Check you are working at the right branch
|
||||
current_branch_command = ['git','branch','--show-current']
|
||||
curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
|
||||
if not branch_name in curr_branch.stdout:
|
||||
raise ValueError("Please checkout the branch: "+branch_name+" via Git Bash Terminal while in the project's directory")
|
||||
|
||||
# Check if review file already exists and then check if it is still untracked
|
||||
review_yaml_file_path = os.path.join("review/",filename+YAML_EXT)
|
||||
|
||||
if not os.path.exists(review_yaml_file_path):
|
||||
review_yaml_file_path = utils.make_file_copy(os.path.join(hdf5_file_path_tail,filename+YAML_EXT), 'review')
|
||||
#else:
|
||||
# raise Warning("the file " + os.path.join("review/",filename+YAML_EXT)+ " already exists. Delete this file to reinitialize the metadata review process.")
|
||||
|
||||
|
||||
review_yaml_file_path_tail, ext = os.path.splitext(review_yaml_file_path)
|
||||
|
||||
with open(os.path.join(review_yaml_file_path_tail+"-review_status"+".txt"),'w') as f:
|
||||
f.write('under review')
|
||||
|
||||
checkout_review_branch(repo_obj, branch_name)
|
||||
# Stage review files and commit them to local repository
|
||||
status = subprocess.run(status_command,capture_output=True,text=True,check=True)
|
||||
untracked_files_for_review = []
|
||||
for line in status.stdout.splitlines():
|
||||
if 'review/' in line.decode('utf8'):
|
||||
untracked_files_for_review.append(line)
|
||||
|
||||
status_dict = repo_obj.status()
|
||||
for filepath, file_status in status_dict.items():
|
||||
result = subprocess.run(add_command(untracked_files_for_review),capture_output=True,check=True)
|
||||
message = 'Initialized metadata review.'
|
||||
commit_output = subprocess.run(commit_command(message),capture_output=True,check=True)
|
||||
|
||||
print(commit_output.stdout)
|
||||
|
||||
|
||||
|
||||
|
||||
#status_dict = repo_obj.status()
|
||||
#for filepath, file_status in status_dict.items():
|
||||
# Identify keys associated to review files and stage them
|
||||
if 'review/'+filename in filepath:
|
||||
# if 'review/'+filename in filepath:
|
||||
# Stage changes
|
||||
repo_obj.index.add(filepath)
|
||||
# repo_obj.index.add(filepath)
|
||||
|
||||
author = config_file.author #default_signature
|
||||
committer = config_file.committer
|
||||
message = "Initialized metadata review process."
|
||||
tree = repo_obj.index.write_tree()
|
||||
oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
||||
#author = config_file.author #default_signature
|
||||
#committer = config_file.committer
|
||||
#message = "Initialized metadata review process."
|
||||
#tree = repo_obj.index.write_tree()
|
||||
#oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
||||
|
||||
#print("Add and commit"+"\n")
|
||||
|
||||
@ -119,41 +149,33 @@ def second_submit_metadata_review(review_yaml_file_path, reviewer_attrs):
|
||||
initials = reviewer_attrs['initials']
|
||||
branch_name = '-'.join([reviewer_attrs['type'],'review','by',initials])
|
||||
# TODO: replace with subprocess + git
|
||||
checkout_review_branch(repo_obj, branch_name)
|
||||
#checkout_review_branch(repo_obj, branch_name)
|
||||
|
||||
# Check you are working at the right branch
|
||||
curr_branch = subprocess.run(current_branch_command,capture_output=True,text=True,check=True)
|
||||
if not branch_name in curr_branch.stdout:
|
||||
raise ValueError('Please checkout ' + branch_name + ' via Git Bash before submitting metadata review files. ')
|
||||
|
||||
#if any([status in get_review_status(filename_path) for status in ['under review','submitted']]):
|
||||
# filename_path_tail, filename_path_head = os.path.split(filename_path)
|
||||
# filename, ext = os.path.splitext(filename_path_head)
|
||||
# # TODO:
|
||||
|
||||
|
||||
##
|
||||
status_command = ['git','status']
|
||||
add_command = lambda add_list: ['git','add'] + add_list
|
||||
commit_command = lambda message: ['git','commit','-m', message]
|
||||
#push_command = lambda repository,refspec: ['git','push',repository,refspec]
|
||||
|
||||
# Collect modified review files
|
||||
status = subprocess.run(status_command,capture_output=True,check=True)
|
||||
|
||||
files_to_add_list = []
|
||||
modified_files = []
|
||||
for line in status.stdout.splitlines():
|
||||
# conver line from bytes to str
|
||||
tmp = line.decode("utf-8")
|
||||
if 'modified' in tmp and review_yaml_file_path in tmp:
|
||||
files_to_add_list.append(tmp.split()[1])
|
||||
##
|
||||
modified_files.append(tmp.split()[1])
|
||||
|
||||
# Stage modified files and commit them to local repository
|
||||
review_yaml_file_path_tail, review_yaml_file_path_head = os.path.split(review_yaml_file_path)
|
||||
filename, ext = os.path.splitext(review_yaml_file_path_head)
|
||||
if files_to_add_list:
|
||||
if modified_files:
|
||||
review_status_file_path = os.path.join("review/",filename+"-review_status"+TXT_EXT)
|
||||
with open(review_status_file_path,'a') as f:
|
||||
f.write('\nsubmitted')
|
||||
|
||||
files_to_add_list.append(review_status_file_path)
|
||||
modified_files.append(review_status_file_path)
|
||||
|
||||
result = subprocess.run(add_command(files_to_add_list),capture_output=True,check=True)
|
||||
result = subprocess.run(add_command(modified_files),capture_output=True,check=True)
|
||||
message = 'Submitted metadata review.'
|
||||
commit_output = subprocess.run(commit_command(message),capture_output=True,check=True)
|
||||
|
||||
@ -163,20 +185,6 @@ def second_submit_metadata_review(review_yaml_file_path, reviewer_attrs):
|
||||
print('Nothing to commit.')
|
||||
|
||||
|
||||
#status_dict = repo_obj.status()
|
||||
#for filepath, file_status in status_dict.items():
|
||||
# Identify keys associated to review files and stage them
|
||||
# if ('review/'+filename in filepath) and (file_status == pygit.GIT_STATUS_WT_MODIFIED):
|
||||
# Stage changes
|
||||
# repo_obj.index.add(filepath)
|
||||
|
||||
#author = config_file.author #default_signature
|
||||
#committer = config_file.committer
|
||||
#message = "Submitted metadata review."
|
||||
#tree = repo_obj.index.write_tree()
|
||||
#oid = repo_obj.create_commit('HEAD', author, committer, message, tree, [repo_obj.head.peel().oid])
|
||||
|
||||
|
||||
|
||||
def third_complete_metadata_review(reviewer_attrs):
|
||||
|
||||
|
@ -1,41 +1,55 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Import python packages and modules"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"# Set up project root directory\n",
|
||||
"root_dir = os.path.abspath(os.curdir)\n",
|
||||
"sys.path.append(root_dir)\n",
|
||||
"\n",
|
||||
"import src.hdf5_vis as hdf5_vis\n",
|
||||
"import src.hdf5_lib as hdf5_lib\n",
|
||||
"import input_files.config_file as config_file\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"output_dir = 'output_files/'\n",
|
||||
"group_id = '5505'#'smog_chamber'#'5505'\n",
|
||||
"user_initials = 'LL' #'NG' #'LL' # 'TBR'\n",
|
||||
"\n",
|
||||
"group_id = 'smog_chamber'#'5505'\n",
|
||||
"user_initials = 'NG'#'LL' #'NG' #'LL' # 'TBR'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Define input file path and keywords\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2 26\n",
|
||||
".dat :)\n",
|
||||
"2 26\n",
|
||||
".dat :)\n",
|
||||
"2 26\n",
|
||||
".dat :)\n",
|
||||
"2 26\n",
|
||||
".dat :)\n",
|
||||
"Humidity_Sensors\n",
|
||||
"Humidity_Sensors/2022\n",
|
||||
"Humidity_Sensors/2022/01_Januar\n",
|
||||
"Humidity_Sensors/2022/02_Februar\n",
|
||||
"Humidity_Sensors/2022/03_März\n",
|
||||
"Humidity_Sensors/2022/04_April\n",
|
||||
"Humidity_Sensors/2022/05_Mai\n",
|
||||
"Humidity_Sensors/2022/06_Juni\n",
|
||||
"Humidity_Sensors/2022/07_Juli\n",
|
||||
"Humidity_Sensors/2022/10_Oktober\n",
|
||||
"Humidity_Sensors/2022/11_November\n",
|
||||
"Humidity_Sensors/2022/12_Dezember\n",
|
||||
"ICAD\n",
|
||||
"ICAD/HONO\n",
|
||||
"ICAD/HONO/2022\n",
|
||||
"ICAD/HONO_prototype\n",
|
||||
"ICAD/HONO_prototype/2022\n",
|
||||
"ICAD/NO2\n",
|
||||
"ICAD/NO2/2022\n"
|
||||
"[==================================================--------------------------------------------------] 50.0% ...Uploading files in \\\\fs03\\Iron_Sulphate\\smps\\20220726\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -49,237 +63,113 @@
|
||||
"branchvalues": "remainder",
|
||||
"customdata": [
|
||||
"<br>",
|
||||
"/Humidity_Sensors",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022/01_Januar",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/03_März",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/04_April",
|
||||
"/Humidity_Sensors/2022/05_Mai",
|
||||
"/Humidity_Sensors/2022/06_Juni",
|
||||
"/Humidity_Sensors/2022/07_Juli",
|
||||
"/Humidity_Sensors/2022/10_Oktober",
|
||||
"/Humidity_Sensors/2022/11_November",
|
||||
"/Humidity_Sensors/2022/12_Dezember",
|
||||
"/ICAD",
|
||||
"/ICAD/HONO",
|
||||
"/ICAD/HONO/2022",
|
||||
"/ICAD/HONO/2022/10_Oct",
|
||||
"/ICAD/HONO/2022/11_Nov",
|
||||
"/ICAD/HONO/2022/12_Dec",
|
||||
"/ICAD/HONO_prototype",
|
||||
"/ICAD/HONO_prototype/2022",
|
||||
"/ICAD/HONO_prototype/2022/01_Jan",
|
||||
"/ICAD/HONO_prototype/2022/02_Feb",
|
||||
"/ICAD/NO2",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022/01_Jan",
|
||||
"/ICAD/NO2/2022/02_Feb",
|
||||
"/ICAD/NO2/2022/03_Mar",
|
||||
"/ICAD/NO2/2022/04_Apr",
|
||||
"/ICAD/NO2/2022/05_May",
|
||||
"/ICAD/NO2/2022/06_June",
|
||||
"/ICAD/NO2/2022/07_July",
|
||||
"/ICAD/NO2/2022/10_Oct",
|
||||
"/ICAD/NO2/2022/11_Nov",
|
||||
"/ICAD/NO2/2022/12_Dec"
|
||||
"/gas",
|
||||
"/gas/20220726_000004_MSC_gases.txt",
|
||||
"/gas/20220726_000004_MSC_gases.txt/categorial_variable_names",
|
||||
"/gas/20220726_000004_MSC_gases.txt/categorical_variables",
|
||||
"/gas/20220726_000004_MSC_gases.txt/numerical_variable_names",
|
||||
"/gas/20220726_000004_MSC_gases.txt/numerical_variables",
|
||||
"/gas/20220726_101617_MSC_gases.txt",
|
||||
"/gas/20220726_101617_MSC_gases.txt/categorial_variable_names",
|
||||
"/gas/20220726_101617_MSC_gases.txt/categorical_variables",
|
||||
"/gas/20220726_101617_MSC_gases.txt/numerical_variable_names",
|
||||
"/gas/20220726_101617_MSC_gases.txt/numerical_variables",
|
||||
"/smps",
|
||||
"/smps/20220726",
|
||||
"/smps/20220726/20220726_mass.TXT",
|
||||
"/smps/20220726/20220726_mass.TXT/categorial_variable_names",
|
||||
"/smps/20220726/20220726_mass.TXT/categorical_variables",
|
||||
"/smps/20220726/20220726_mass.TXT/numerical_variable_names",
|
||||
"/smps/20220726/20220726_mass.TXT/numerical_variables",
|
||||
"/smps/20220726/20220726_num.TXT",
|
||||
"/smps/20220726/20220726_num.TXT/categorial_variable_names",
|
||||
"/smps/20220726/20220726_num.TXT/categorical_variables",
|
||||
"/smps/20220726/20220726_num.TXT/numerical_variable_names",
|
||||
"/smps/20220726/20220726_num.TXT/numerical_variables"
|
||||
],
|
||||
"hovertemplate": "<b>%{label} </b> <br> Count: %{value} <br> Path: %{customdata}",
|
||||
"labels": [
|
||||
"/",
|
||||
"/Humidity_Sensors",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022/01_Januar",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/03_März",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorial_variable_names",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/categorical_variables",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variable_names",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat/numerical_variables",
|
||||
"/Humidity_Sensors/2022/04_April",
|
||||
"/Humidity_Sensors/2022/05_Mai",
|
||||
"/Humidity_Sensors/2022/06_Juni",
|
||||
"/Humidity_Sensors/2022/07_Juli",
|
||||
"/Humidity_Sensors/2022/10_Oktober",
|
||||
"/Humidity_Sensors/2022/11_November",
|
||||
"/Humidity_Sensors/2022/12_Dezember",
|
||||
"/ICAD",
|
||||
"/ICAD/HONO",
|
||||
"/ICAD/HONO/2022",
|
||||
"/ICAD/HONO/2022/10_Oct",
|
||||
"/ICAD/HONO/2022/11_Nov",
|
||||
"/ICAD/HONO/2022/12_Dec",
|
||||
"/ICAD/HONO_prototype",
|
||||
"/ICAD/HONO_prototype/2022",
|
||||
"/ICAD/HONO_prototype/2022/01_Jan",
|
||||
"/ICAD/HONO_prototype/2022/02_Feb",
|
||||
"/ICAD/NO2",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022/01_Jan",
|
||||
"/ICAD/NO2/2022/02_Feb",
|
||||
"/ICAD/NO2/2022/03_Mar",
|
||||
"/ICAD/NO2/2022/04_Apr",
|
||||
"/ICAD/NO2/2022/05_May",
|
||||
"/ICAD/NO2/2022/06_June",
|
||||
"/ICAD/NO2/2022/07_July",
|
||||
"/ICAD/NO2/2022/10_Oct",
|
||||
"/ICAD/NO2/2022/11_Nov",
|
||||
"/ICAD/NO2/2022/12_Dec"
|
||||
"/gas",
|
||||
"/gas/20220726_000004_MSC_gases.txt",
|
||||
"/gas/20220726_000004_MSC_gases.txt/categorial_variable_names",
|
||||
"/gas/20220726_000004_MSC_gases.txt/categorical_variables",
|
||||
"/gas/20220726_000004_MSC_gases.txt/numerical_variable_names",
|
||||
"/gas/20220726_000004_MSC_gases.txt/numerical_variables",
|
||||
"/gas/20220726_101617_MSC_gases.txt",
|
||||
"/gas/20220726_101617_MSC_gases.txt/categorial_variable_names",
|
||||
"/gas/20220726_101617_MSC_gases.txt/categorical_variables",
|
||||
"/gas/20220726_101617_MSC_gases.txt/numerical_variable_names",
|
||||
"/gas/20220726_101617_MSC_gases.txt/numerical_variables",
|
||||
"/smps",
|
||||
"/smps/20220726",
|
||||
"/smps/20220726/20220726_mass.TXT",
|
||||
"/smps/20220726/20220726_mass.TXT/categorial_variable_names",
|
||||
"/smps/20220726/20220726_mass.TXT/categorical_variables",
|
||||
"/smps/20220726/20220726_mass.TXT/numerical_variable_names",
|
||||
"/smps/20220726/20220726_mass.TXT/numerical_variables",
|
||||
"/smps/20220726/20220726_num.TXT",
|
||||
"/smps/20220726/20220726_num.TXT/categorial_variable_names",
|
||||
"/smps/20220726/20220726_num.TXT/categorical_variables",
|
||||
"/smps/20220726/20220726_num.TXT/numerical_variable_names",
|
||||
"/smps/20220726/20220726_num.TXT/numerical_variables"
|
||||
],
|
||||
"name": "",
|
||||
"parents": [
|
||||
"",
|
||||
"/",
|
||||
"/Humidity_Sensors",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022/01_Januar",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/01_Januar/2022-01-31_09.37.56_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022/02_Februar",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_09.07.50_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/02_Februar/2022-02-11_16.46.26_PC10228_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022/03_März",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022/03_März/2022-03-14_09.05.01_PC14751_Humidity_Sensors.dat",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/Humidity_Sensors/2022",
|
||||
"/gas",
|
||||
"/gas/20220726_000004_MSC_gases.txt",
|
||||
"/gas/20220726_000004_MSC_gases.txt",
|
||||
"/gas/20220726_000004_MSC_gases.txt",
|
||||
"/gas/20220726_000004_MSC_gases.txt",
|
||||
"/gas",
|
||||
"/gas/20220726_101617_MSC_gases.txt",
|
||||
"/gas/20220726_101617_MSC_gases.txt",
|
||||
"/gas/20220726_101617_MSC_gases.txt",
|
||||
"/gas/20220726_101617_MSC_gases.txt",
|
||||
"/",
|
||||
"/ICAD",
|
||||
"/ICAD/HONO",
|
||||
"/ICAD/HONO/2022",
|
||||
"/ICAD/HONO/2022",
|
||||
"/ICAD/HONO/2022",
|
||||
"/ICAD",
|
||||
"/ICAD/HONO_prototype",
|
||||
"/ICAD/HONO_prototype/2022",
|
||||
"/ICAD/HONO_prototype/2022",
|
||||
"/ICAD",
|
||||
"/ICAD/NO2",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022",
|
||||
"/ICAD/NO2/2022"
|
||||
"/smps",
|
||||
"/smps/20220726",
|
||||
"/smps/20220726/20220726_mass.TXT",
|
||||
"/smps/20220726/20220726_mass.TXT",
|
||||
"/smps/20220726/20220726_mass.TXT",
|
||||
"/smps/20220726/20220726_mass.TXT",
|
||||
"/smps/20220726",
|
||||
"/smps/20220726/20220726_num.TXT",
|
||||
"/smps/20220726/20220726_num.TXT",
|
||||
"/smps/20220726/20220726_num.TXT",
|
||||
"/smps/20220726/20220726_num.TXT"
|
||||
],
|
||||
"root": {
|
||||
"color": "lightgrey"
|
||||
},
|
||||
"type": "treemap",
|
||||
"values": [
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
5,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
14,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
6,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
4,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
4,
|
||||
9,
|
||||
11,
|
||||
3,
|
||||
8,
|
||||
17,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
7,
|
||||
8,
|
||||
2,
|
||||
1,
|
||||
1,
|
||||
3,
|
||||
6,
|
||||
1,
|
||||
0,
|
||||
3,
|
||||
6,
|
||||
5,
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
2,
|
||||
5,
|
||||
8,
|
||||
2
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
4,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -1116,18 +1006,37 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"root_dir = os.path.abspath(os.curdir)\n",
|
||||
"sys.path.append(root_dir)\n",
|
||||
"#input_file_dir = '//fs101/5505/People/Juan/TypicalBeamTime'\n",
|
||||
"#select_file_keywords=[]\n",
|
||||
"#select_dir_keywords = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']\n",
|
||||
"\n",
|
||||
"import src.hdf5_vis as hdf5_vis\n",
|
||||
"import src.hdf5_lib as hdf5_lib\n",
|
||||
"\n",
|
||||
"output_filename_path, output_yml_filename_path = hdf5_lib.main()\n",
|
||||
"#input_file_dir = '//fs101/5505/Data' \n",
|
||||
"#select_dir_keywords = ['Lopap', 'Humidity_Sensors/2022', 'ICAD/HONO/2022', 'ICAD/NO2/2022', 'T200_NOX', 'T360U_CO2']\n",
|
||||
"#select_file_keywords = ['2022-03-25','2022_03_25','20220325']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"input_file_dir = '//fs03/Iron_Sulphate'\n",
|
||||
"select_dir_keywords = ['gas','smps/20220726']#,'htof/2022.07.26','ptr/2022.07.26','ams/2022.07.26']\n",
|
||||
"#select_dir_keywords = ['htof','ams', 'ptr', 'gas','smps'] \n",
|
||||
"\n",
|
||||
"select_file_keywords = ['20220726','2022.07.26']\n",
|
||||
"\n",
|
||||
"config_param = {'group_id' : group_id, 'user_initials' : user_initials, 'output_dir': output_dir}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"output_filename_path, output_yml_filename_path = hdf5_lib.create_hdf5_file_from_filesystem_path(config_param,\n",
|
||||
" input_file_dir,\n",
|
||||
" select_dir_keywords,\n",
|
||||
" select_file_keywords)\n",
|
||||
"\n",
|
||||
"hdf5_vis.display_group_hierarchy_on_a_treemap(output_filename_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
160
workflow_data_owner_review.ipynb
Normal file
160
workflow_data_owner_review.ipynb
Normal file
@ -0,0 +1,160 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Import libraries and modules\n",
|
||||
"\n",
|
||||
"* Excecute (or Run) Cell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"root_dir = os.path.abspath(os.curdir)\n",
|
||||
"sys.path.append(root_dir)\n",
|
||||
"\n",
|
||||
"import src.metadata_review_lib as metadata_review_lib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Step 1: initialize metadata review.\n",
|
||||
"\n",
|
||||
"* Specify hdf5 file whose metadata is to be reviewed by editing the string variable `hdf5_file_path`.\n",
|
||||
"* Edit reviewer attributes, i.e., the dict variable `reviewer_attrs` with your own initials and role.\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Create branch metadata-review-by-NG\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "('Please checkout the branch: ', 'data-owner-review-by-NG', \" via Git Bash Terminal while in the project's directory\")",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[2], line 12\u001b[0m\n\u001b[0;32m 7\u001b[0m reviewer_attrs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minitials\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNG\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 8\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata-owner\u001b[39m\u001b[38;5;124m'\u001b[39m}\n\u001b[0;32m 10\u001b[0m \u001b[38;5;66;03m#output_filename_path, output_yml_filename_path = hdf5_lib.main()\u001b[39;00m\n\u001b[1;32m---> 12\u001b[0m review_yaml_file_path \u001b[38;5;241m=\u001b[39m \u001b[43mmetadata_review_lib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfirst_initialize_metadata_review\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhdf5_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreviewer_attrs\u001b[49m\u001b[43m)\u001b[49m \n\u001b[0;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(review_yaml_file_path) \n",
|
||||
"File \u001b[1;32mc:\\Users\\florez_j\\Documents\\GitLab\\functionspython\\src\\metadata_review_lib.py:89\u001b[0m, in \u001b[0;36mfirst_initialize_metadata_review\u001b[1;34m(hdf5_file_path, reviewer_attrs)\u001b[0m\n\u001b[0;32m 87\u001b[0m curr_branch \u001b[38;5;241m=\u001b[39m subprocess\u001b[38;5;241m.\u001b[39mrun(current_branch_command,capture_output\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,check\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 88\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m branch_name \u001b[38;5;129;01min\u001b[39;00m curr_branch\u001b[38;5;241m.\u001b[39mstdout:\n\u001b[1;32m---> 89\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease checkout the branch: \u001b[39m\u001b[38;5;124m\"\u001b[39m,branch_name,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m via Git Bash Terminal while in the project\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms directory\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 91\u001b[0m \u001b[38;5;66;03m# Check if review file already exists and then check if it is still untracked\u001b[39;00m\n\u001b[0;32m 92\u001b[0m review_yaml_file_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreview/\u001b[39m\u001b[38;5;124m\"\u001b[39m,filename\u001b[38;5;241m+\u001b[39mYAML_EXT)\n",
|
||||
"\u001b[1;31mValueError\u001b[0m: ('Please checkout the branch: ', 'data-owner-review-by-NG', \" via Git Bash Terminal while in the project's directory\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"#hdf5_file_path = \"output_files/unified_file_smog_chamber_2024-03-25_UTC-OFST_+0100_NG.h5\"\n",
|
||||
"#yml_file_path = \"output_files/unified_file_smog_chamber_2024-03-25_UTC-OFST_+0100_NG.yaml\"\n",
|
||||
"\n",
|
||||
"hdf5_file_path = \"output_files/unified_file_smog_chamber_2024-04-03_UTC-OFST_+0200_NG.h5\"\n",
|
||||
"yml_file_path = \"output_files/unified_file_smog_chamber_2024-04-03_UTC-OFST_+0200_NG.yaml\"\n",
|
||||
"\n",
|
||||
"reviewer_attrs = {'initials': 'NG',\n",
|
||||
" 'type': 'data-owner'}\n",
|
||||
"\n",
|
||||
"#output_filename_path, output_yml_filename_path = hdf5_lib.main()\n",
|
||||
"\n",
|
||||
"review_yaml_file_path = metadata_review_lib.first_initialize_metadata_review(hdf5_file_path, reviewer_attrs) \n",
|
||||
"\n",
|
||||
"print(review_yaml_file_path) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Step 2: Submit metadata review. \n",
|
||||
"\n",
|
||||
"* Edit yaml file in review folder and save changes\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metadata_review_lib.second_submit_metadata_review(review_yaml_file_path,reviewer_attrs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Step 3: Update hdf5 file metadata w/ submitted review yaml file.\n",
|
||||
"\n",
|
||||
"* Make sure previous step was carried out properly.\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"metadata_review_lib.third_update_hdf5_file_with_review(hdf5_file_path, review_yaml_file_path, reviewer_attrs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Step 4: Complete data-owner review. Update remote repository\n",
|
||||
"\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metadata_review_lib.fourth_complete_metadata_review(reviewer_attrs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "test_atmos_chem_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
18
workflow_metadata_reviewer.ipynb
Normal file
18
workflow_metadata_reviewer.ipynb
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Reference in New Issue
Block a user