Moved openbis_lib.py to src folder.
This commit is contained in:
270
src/openbis_lib.py
Normal file
270
src/openbis_lib.py
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import datetime
|
||||||
|
from pybis import Openbis
|
||||||
|
import hidden
|
||||||
|
|
||||||
|
admissible_props_list = ['$name', 'filenumber', 'default_experiment.experimental_results',
|
||||||
|
'dataquality', '$xmlcomments', '$annotations_state',
|
||||||
|
'sample_name', 'position_x', 'position_y', 'position_z', 'temp', 'cell_pressure', 'gas_flow_setting', 'sample_notes',
|
||||||
|
'beamline', 'photon_energy', 'slit_entrance_v', 'slit_exit_v', 'izero',
|
||||||
|
'slit_exit_h', 'hos', 'cone', 'endstation', 'hof',
|
||||||
|
'method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time', 'frames', 'passenergy',
|
||||||
|
'iterations', 'sequenceiterations', 'ke_range_center', 'ke_step']
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_openbis_obj():
|
||||||
|
|
||||||
|
# TODO: implement a more secure authentication method.
|
||||||
|
openbis_obj = Openbis('https://openbis-psi.labnotebook.ch/openbis/webapp/eln-lims/?menuUniqueId=null&viewName=showBlancPage&viewData=null', verify_certificates=False)
|
||||||
|
openbis_obj.login(hidden.username,hidden.password)
|
||||||
|
|
||||||
|
return openbis_obj
|
||||||
|
|
||||||
|
def align_datetime_observation_windows(df_h5: pd.DataFrame, df_openbis: pd.DataFrame, h5_datetime_var: str = 'lastModifiedDatestr', ob_datetime_var: str = 'registrationDate') -> pd.DataFrame:
|
||||||
|
|
||||||
|
""" returns filtered/reduced versions of 'df' and 'df_ref' with aligned datetime observation windows.
|
||||||
|
That is, the datetime variable range is the same for the returned dataframes."""
|
||||||
|
#""returns a filtered or reduced version of 'df' by removing all rows that are outside the datetime variable overlapping region between 'df' and 'df_ref'.
|
||||||
|
#"""
|
||||||
|
|
||||||
|
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
|
||||||
|
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
|
||||||
|
|
||||||
|
if not (h5_datetime_var in df_h5.columns.to_list() and ob_datetime_var in df_openbis.columns.to_list()):
|
||||||
|
#TODO: Check if ValueError is the best type of error to raise here
|
||||||
|
raise ValueError("Dataframes 'df' and 'df_ref' must contain columns 'datetime_var' and 'datetime_var_ref', storing values in suitable datetime string format (e.g., yyyy-mm-dd hh:mm:ss).")
|
||||||
|
|
||||||
|
df_h5[h5_datetime_var] = df_h5[h5_datetime_var].astype('datetime64[ns]')
|
||||||
|
df_openbis[ob_datetime_var] = df_openbis[ob_datetime_var].astype('datetime64[ns]')
|
||||||
|
|
||||||
|
min_timestamp = max([df_openbis[ob_datetime_var].min(), df_h5[h5_datetime_var].min()])
|
||||||
|
max_timestamp = min([df_openbis[ob_datetime_var].max(), df_h5[h5_datetime_var].max()])
|
||||||
|
|
||||||
|
# Determine overlap between df and df_ref, and filters out all rows from df with datetime variable outside the overlapping datetime region.
|
||||||
|
datetime_overlap_indicator = (df_h5[h5_datetime_var] >= min_timestamp) & (df_h5[h5_datetime_var] <= max_timestamp)
|
||||||
|
df_h5 = df_h5.loc[datetime_overlap_indicator,:]
|
||||||
|
|
||||||
|
datetime_overlap_indicator = (df_openbis[ob_datetime_var] >= min_timestamp) & (df_openbis[ob_datetime_var] <= max_timestamp)
|
||||||
|
df_openbis = df_openbis.loc[datetime_overlap_indicator,:]
|
||||||
|
|
||||||
|
df_h5 = df_h5.sort_values(by=h5_datetime_var)
|
||||||
|
df_openbis = df_openbis.sort_values(by=ob_datetime_var)
|
||||||
|
|
||||||
|
return df_h5, df_openbis
|
||||||
|
|
||||||
|
def reformat_openbis_dataframe_filenumber(df_openbis):
|
||||||
|
|
||||||
|
if not 'FILENUMBER' in df_openbis.columns:
|
||||||
|
raise ValueError('df_openbis does not contain the column "FILENUMBER". Make sure you query (e.g., o.get_samples(props=["filenumbe"])) that before creating df_openbis.')
|
||||||
|
#if not 'name' in df.columns:
|
||||||
|
# raise ValueError("df does not contain the column 'name'. Ensure df complies with Throsten's Table's format.")
|
||||||
|
|
||||||
|
# Augment df_openbis with 'name' column consitent with Thorsten's naming convention
|
||||||
|
name_list = ['0' + item.zfill(3) + item.zfill(3) for item in df_openbis['FILENUMBER']]
|
||||||
|
df_openbis['REFORMATED_FILENUMBER'] = pd.Series(name_list, index=df_openbis.index)
|
||||||
|
|
||||||
|
return df_openbis
|
||||||
|
|
||||||
|
def pair_openbis_and_h5_dataframes(df_openbis, df_h5, pairing_ob_var: str, pairing_h5_var: str):
|
||||||
|
|
||||||
|
""" Pairs every row (or openbis sample) in 'df_openbis' with a set of rows (or measurements) in 'df_h5' by matching the i-th row in 'df_h5'
|
||||||
|
with the rows of 'df_h5' that satisfy the string df_openbis.loc[i,pairing_var_1] is contained in the string df_h5[i,pairing_var_2]
|
||||||
|
|
||||||
|
Example: pairing_var_1, pairing_var_2 = reformated 'REFORMATED_FILENUMBER', 'name'
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Reformat openbis dataframe filenumber so that it can be used to find associated measurements in h5 dataframe
|
||||||
|
df_openbis = reformat_openbis_dataframe_filenumber(df_openbis)
|
||||||
|
|
||||||
|
related_indices_list = []
|
||||||
|
for sample_idx in df_openbis.index:
|
||||||
|
sample_value = df_openbis.loc[sample_idx,pairing_ob_var]
|
||||||
|
tmp_list = [sample_value in item[0:item.find('_')] for item in df_h5[pairing_h5_var]]
|
||||||
|
related_indices_list.append(df_h5.index[tmp_list])
|
||||||
|
|
||||||
|
print('Paring openbis sample: ' + df_openbis.loc[sample_idx,pairing_ob_var])
|
||||||
|
print('with reformated FILENUMBER: ' + sample_value)
|
||||||
|
print('to following measurements in h5 dataframe:')
|
||||||
|
print(df_h5.loc[df_h5.index[tmp_list],'name'])
|
||||||
|
print('\n')
|
||||||
|
|
||||||
|
df_openbis['related_h5_indices'] = pd.Series(related_indices_list, index=df_openbis.index)
|
||||||
|
|
||||||
|
return df_openbis
|
||||||
|
|
||||||
|
|
||||||
|
def range_cols_2_string(df,lb_var,ub_var):
|
||||||
|
|
||||||
|
if not sum(df.loc[:,ub_var]-df.loc[:,lb_var])==0:
|
||||||
|
#tmp_list = ['-'.join([str(round(df.loc[i,lb_var],2)),str(round(df.loc[i,ub_var],1))]) for i in df.index]
|
||||||
|
tmp_list = ['-'.join(["{:.1f}".format(df.loc[i,lb_var]),"{:.1f}".format(df.loc[i,ub_var])]) for i in df.index]
|
||||||
|
elif len(df.loc[:,lb_var].unique())>1: # check if values are different
|
||||||
|
#tmp_list = [str(round(df.loc[i,lb_var],2)) for i in df.index]
|
||||||
|
tmp_list = ["{:.1f}".format(df.loc[i,lb_var]) for i in df.index]
|
||||||
|
else:
|
||||||
|
#tmp_list = [str(round(df.loc[0,lb_var],2))]
|
||||||
|
tmp_list = ["{:.1f}".format(df.loc[0,lb_var])]
|
||||||
|
return '/'.join(tmp_list)
|
||||||
|
|
||||||
|
def col_2_string(df,column_var):
|
||||||
|
|
||||||
|
if not column_var in df.columns:
|
||||||
|
raise ValueError("'column var must belong in df.columns")
|
||||||
|
|
||||||
|
#tmp_list = [str(round(item,1)) for item in df[column_var]]
|
||||||
|
tmp_list = ["{:.2f}".format(item) for item in df[column_var]]
|
||||||
|
if len(df[column_var].unique())==1:
|
||||||
|
tmp_list = [tmp_list[0]]
|
||||||
|
|
||||||
|
return '/'.join(tmp_list)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx):
|
||||||
|
|
||||||
|
prop2attr = {'sample_name':'sample', # ask Throsten whether this assignment is correct or not
|
||||||
|
'position_x':'smplX_mm',
|
||||||
|
'position_y':'smplY_mm',
|
||||||
|
'position_z':'smplZ_mm',
|
||||||
|
'temp':'sampleTemp_dC',
|
||||||
|
'cell_pressure':'cellPressure_mbar',
|
||||||
|
#'gas_flow_setting': '',
|
||||||
|
'method_name':'regionName', # measurement type: XPS or NEXAFS
|
||||||
|
'region':'regionName', # VB/N1s/C1s
|
||||||
|
'passenergy':'regionName', # REAL
|
||||||
|
|
||||||
|
'photon_energy':'xRayEkinRange_eV',
|
||||||
|
'dwell_time':'scientaDwellTime_ms',
|
||||||
|
'acq_mode':'scientaAcquisitionMode',
|
||||||
|
'ke_range_center':'scientaEkinRange_eV',
|
||||||
|
'ke_step':'scientaEkinStep_eV',
|
||||||
|
'lens_mode':'scientaLensMode'
|
||||||
|
}
|
||||||
|
|
||||||
|
sample_identifier = df_openbis.loc[sample_idx,'identifier']
|
||||||
|
props_dict = {'FILENUMBER' : df_openbis.loc[sample_idx,'FILENUMBER']}
|
||||||
|
|
||||||
|
#props_dict = {}
|
||||||
|
|
||||||
|
if not len(df_openbis.loc[sample_idx,'related_h5_indices']):
|
||||||
|
props_dict['identifier'] = sample_identifier
|
||||||
|
return props_dict
|
||||||
|
|
||||||
|
reduced_df_h5 = df_h5.loc[df_openbis.loc[sample_idx,'related_h5_indices'],:]
|
||||||
|
reduced_df_h5 = reduced_df_h5.reset_index()
|
||||||
|
|
||||||
|
# include related_samples key for validation purposes. Related samples are used to compute average and/or combined openbis properties.
|
||||||
|
related_sample_list = [reduced_df_h5['name'][index] for index in reduced_df_h5['name'].index]
|
||||||
|
related_samples = ' / '.join(related_sample_list)
|
||||||
|
props_dict['Subject_samples'] = related_samples
|
||||||
|
|
||||||
|
props_dict['sample_name'] = reduced_df_h5['sample'].unique()[0] if len(reduced_df_h5['sample'].unique())==1 else '/'.join(reduced_df_h5['sample'].tolist())
|
||||||
|
|
||||||
|
if not 'NEXAFS' in reduced_df_h5['regionName'].iloc[0]:
|
||||||
|
props_dict['identifier'] = sample_identifier
|
||||||
|
props_dict['method_name'] = 'XPS'
|
||||||
|
for item_idx in reduced_df_h5.index:
|
||||||
|
item = reduced_df_h5.loc[item_idx,'regionName']
|
||||||
|
if item_idx > 0:
|
||||||
|
props_dict['region'] = props_dict['region'] + '/' + item[0:item.find('_')]
|
||||||
|
#props_dict['dwell_time'] = props_dict['dwell_time'] + '/' + str(reduced_df_h5.loc[item_idx,'scientaDwellTime_ms'])
|
||||||
|
#props_dict['ke_range_center'] = props_dict['ke_range_center'] + '/' + str(round(reduced_df_h5.loc[item_idx,['scientaEkinRange_eV_1','scientaEkinRange_eV_2']].mean(),2))
|
||||||
|
#props_dict['ke_step_center'] = props_dict['ke_step_center'] + '/' + str(reduced_df_h5.loc[item_idx,'scientaEkinStep_eV'])
|
||||||
|
#props_dict['passenergy'].append(float(item[item.find('_')+1:item.find('eV')]))
|
||||||
|
else:
|
||||||
|
props_dict['region'] = item[0:item.find('_')]
|
||||||
|
#props_dict['dwell_time'] = str(reduced_df_h5.loc[item_idx,'scientaDwellTime_ms'])
|
||||||
|
#props_dict['ke_range_center'] = str(round(reduced_df_h5.loc[item_idx,['scientaEkinRange_eV_1','scientaEkinRange_eV_2']].mean(),2))
|
||||||
|
#props_dict['ke_step_center'] = str(reduced_df_h5.loc[item_idx,'scientaEkinStep_eV'])
|
||||||
|
|
||||||
|
#props_dict['passenergy'] = reduced_df_h5.loc[:,'scientaPassEnergy_eV'].min()
|
||||||
|
|
||||||
|
else:
|
||||||
|
props_dict = {'identifier':sample_identifier,'method_name':'NEXAFS'}
|
||||||
|
|
||||||
|
|
||||||
|
#props_dict['temp'] = round(reduced_df_h5['sampleTemp_dC'].mean(),2)
|
||||||
|
#props_dict['cell_pressure'] = round(reduced_df_h5['cellPressure_mbar'].mean(),2)
|
||||||
|
props_dict['temp'] = "{:.2f}".format(reduced_df_h5['sampleTemp_dC'].mean())
|
||||||
|
props_dict['cell_pressure'] = "{:.2f}".format(reduced_df_h5['cellPressure_mbar'].mean())
|
||||||
|
|
||||||
|
reduced_df_h5['scientaDwellTime_ms'] = reduced_df_h5['scientaDwellTime_ms']*1e-3 # covert ms to seconds
|
||||||
|
props_dict['dwell_time'] = col_2_string(reduced_df_h5,'scientaDwellTime_ms')
|
||||||
|
props_dict['passenergy'] = col_2_string(reduced_df_h5,'scientaPassEnergy_eV')
|
||||||
|
props_dict['ke_step_center'] = col_2_string(reduced_df_h5,'scientaEkinStep_eV')
|
||||||
|
#props_dict['photon_energy'] =round(reduced_df_h5[['xRayEkinRange_eV_1','xRayEkinRange_eV_2']].mean(axis=1)[0],2)
|
||||||
|
props_dict['photon_energy'] = range_cols_2_string(reduced_df_h5,'xRayEkinRange_eV_1','xRayEkinRange_eV_2')
|
||||||
|
props_dict['ke_range_center'] = range_cols_2_string(reduced_df_h5,'scientaEkinRange_eV_1','scientaEkinRange_eV_2')
|
||||||
|
|
||||||
|
props_dict['lens_mode'] = reduced_df_h5['scientaLensMode'][0]
|
||||||
|
props_dict['acq_mode'] = reduced_df_h5['scientaAcquisitionMode'][0]
|
||||||
|
|
||||||
|
props_dict['position_x'] = "{:.2f}".format(reduced_df_h5.loc[:,'smplX_mm'].mean()) # round(reduced_df_h5.loc[:,'smplX_mm'].mean(),2)
|
||||||
|
props_dict['position_y'] = "{:.2f}".format(reduced_df_h5.loc[:,'smplY_mm'].mean())
|
||||||
|
props_dict['position_z'] = "{:.2f}".format(reduced_df_h5.loc[:,'smplZ_mm'].mean())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return props_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def single_sample_update(sample_props_dict,sample_collection,props_include_list):
|
||||||
|
|
||||||
|
""" Updates sample in openbis database specified in sample_props_dict, which must belong in sample_collection (i.e., result of openbis_obj.get_samples(...)) """
|
||||||
|
|
||||||
|
try:
|
||||||
|
sample_path_identifier = sample_props_dict['identifier'] #path-like index
|
||||||
|
sample = sample_collection[sample_path_identifier]
|
||||||
|
for prop in sample_props_dict.keys():
|
||||||
|
if (prop in admissible_props_list) and (prop in props_include_list):
|
||||||
|
sample.props[prop] = sample_props_dict[prop]
|
||||||
|
sample.save()
|
||||||
|
except Exception:
|
||||||
|
logging.error(Exception)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def sample_batch_update(openbis_obj,sample_collection,df_openbis,df_h5,props_include_list):
|
||||||
|
|
||||||
|
""" See """
|
||||||
|
|
||||||
|
if not 'related_h5_indices' in df_openbis.columns:
|
||||||
|
raise ValueError("Input dataframe 'df_openbis' must contain a column named 'related_h5_indeces', resulting from suitable proprocessing steps.")
|
||||||
|
|
||||||
|
# TODO: as a safeguard, create exclude list containing properties that must not be changed
|
||||||
|
exclude_list = ['filenumber','FILENUMBER','identifier']
|
||||||
|
for item in props_include_list:
|
||||||
|
if item in exclude_list:
|
||||||
|
props_include_list.remove(item)
|
||||||
|
|
||||||
|
trans = openbis_obj.new_transaction()
|
||||||
|
for sample_idx in len(range(df_openbis['identifier'])):
|
||||||
|
|
||||||
|
props_dict = compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
|
||||||
|
sample_path_identifier = props_dict['identifier'] #path-like index
|
||||||
|
sample = sample_collection[sample_path_identifier]
|
||||||
|
|
||||||
|
for prop in props_dict.keys():
|
||||||
|
if prop in props_include_list:
|
||||||
|
sample.props[prop] = props_dict[prop]
|
||||||
|
|
||||||
|
trans.add(sample)
|
||||||
|
|
||||||
|
trans.commit()
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def conduct_dataframe_preprocessing_steps(df_h5, df_openbis):
|
||||||
|
|
||||||
|
if not 'lastModifiedDatestr'in df_h5.columns:
|
||||||
|
raise ValueError('')
|
||||||
|
|
||||||
|
df_h5, df_openbis = align_datetime_observation_windows(df_h5, df_openbis, 'lastModifiedDatestr' , 'registrationDate')
|
||||||
|
df_openbis = pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
|
||||||
|
|
||||||
|
return df_h5, df_openbis
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user