mirror of
https://gitea.psi.ch/APOG/acsmnode.git
synced 2025-06-27 20:11:09 +02:00
Remove dataset_name parameter from command line interface. Instead i now infer it assuming there is a consistent input filename in the hdf5 file that can be found using hard coded keywords.
This commit is contained in:
@ -60,6 +60,8 @@ def compute_calibration_factors(data_table, datetime_var_name, calibration_param
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
calibration_factors_dict = {}
|
calibration_factors_dict = {}
|
||||||
|
|
||||||
|
calibration_factors_dict = {datetime_var_name : data_table[datetime_var_name].to_numpy()}
|
||||||
for variable_name in calibration_factors['variables']:
|
for variable_name in calibration_factors['variables']:
|
||||||
#tmp = np.empty(shape=data_table[datetime_var_name].to_numpy().shape)
|
#tmp = np.empty(shape=data_table[datetime_var_name].to_numpy().shape)
|
||||||
|
|
||||||
@ -86,7 +88,7 @@ def compute_calibration_factors(data_table, datetime_var_name, calibration_param
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid calibration interval: start_datetime {t1} must be before end_datetime {t2}")
|
raise ValueError(f"Invalid calibration interval: start_datetime {t1} must be before end_datetime {t2}")
|
||||||
|
|
||||||
calibration_factors_dict[variable_name] = tmp
|
calibration_factors_dict[f'factor_{variable_name}'] = tmp
|
||||||
|
|
||||||
return pd.DataFrame(data=calibration_factors_dict)
|
return pd.DataFrame(data=calibration_factors_dict)
|
||||||
|
|
||||||
@ -158,7 +160,7 @@ def apply_calibration_factors(data_table, datetime_var_name, calibration_file):
|
|||||||
if variable in calibration_factors['variables'].keys(): # use standard calibration factor
|
if variable in calibration_factors['variables'].keys(): # use standard calibration factor
|
||||||
|
|
||||||
# Apply calibration to each variable
|
# Apply calibration to each variable
|
||||||
new_data_table[variable] = new_data_table[variable].mul(calibration_factor_table[variable])
|
new_data_table[variable] = new_data_table[variable].mul(calibration_factor_table[f'factor_{variable}'])
|
||||||
|
|
||||||
# Add renaming entry
|
# Add renaming entry
|
||||||
variable_rename_dict[variable] = f"{variable}_correct"
|
variable_rename_dict[variable] = f"{variable}_correct"
|
||||||
@ -182,7 +184,7 @@ if __name__ == '__main__':
|
|||||||
# Set up argument parsing
|
# Set up argument parsing
|
||||||
parser = argparse.ArgumentParser(description="Calibrate species data using calibration factors.")
|
parser = argparse.ArgumentParser(description="Calibrate species data using calibration factors.")
|
||||||
parser.add_argument('data_file', type=str, help="Path to the input HDF5 file containing the data table.")
|
parser.add_argument('data_file', type=str, help="Path to the input HDF5 file containing the data table.")
|
||||||
parser.add_argument('dataset_name', type=str, help ='Relative path to data_table (i.e., dataset name) in HDF5 file')
|
#parser.add_argument('dataset_name', type=str, help ='Relative path to data_table (i.e., dataset name) in HDF5 file')
|
||||||
parser.add_argument('calibration_file', type=str, help="Path to the input YAML file containing calibration factors.")
|
parser.add_argument('calibration_file', type=str, help="Path to the input YAML file containing calibration factors.")
|
||||||
#parser.add_argument('output_file', type=str, help="Path to save the output calibrated data as a CSV file.")
|
#parser.add_argument('output_file', type=str, help="Path to save the output calibrated data as a CSV file.")
|
||||||
|
|
||||||
@ -196,20 +198,36 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
dataManager = dataOps.HDF5DataOpsManager(args.data_file)
|
dataManager = dataOps.HDF5DataOpsManager(args.data_file)
|
||||||
dataManager.load_file_obj()
|
dataManager.load_file_obj()
|
||||||
dataset_name = '/'+args.dataset_name
|
|
||||||
|
dataManager.extract_and_load_dataset_metadata()
|
||||||
|
dataset_metadata_df = dataManager.dataset_metadata_df.copy()
|
||||||
|
|
||||||
|
|
||||||
|
keywords = ['ACSM_TOFWARE/','ACSM_JFJ_','_timeseries.txt/data_table']
|
||||||
|
find_keyword = [all(keyword in item for keyword in keywords) for item in dataset_metadata_df['dataset_name']]
|
||||||
|
|
||||||
|
if sum(find_keyword)!=1:
|
||||||
|
input_file_name = '<year>'.join(keywords)
|
||||||
|
raise RuntimeError(f'Input file {input_file_name} was neither found nor uniquely identified.')
|
||||||
|
|
||||||
|
dataset_name = dataset_metadata_df['dataset_name'][find_keyword].values[0]
|
||||||
|
parent_file = dataset_metadata_df.loc[find_keyword,'parent_file'].values[0]
|
||||||
|
parent_instrument = dataset_metadata_df.loc[find_keyword,'parent_instrument'].values[0]
|
||||||
|
|
||||||
|
#dataset_name = '/'+args.dataset_name
|
||||||
data_table = dataManager.extract_dataset_as_dataframe(dataset_name)
|
data_table = dataManager.extract_dataset_as_dataframe(dataset_name)
|
||||||
datetime_var, datetime_format = dataManager.infer_datetime_variable(dataset_name)
|
datetime_var, datetime_format = dataManager.infer_datetime_variable(dataset_name)
|
||||||
|
|
||||||
#data_table['t_start_Buf'] = data_table['t_start_Buf'].apply(lambda x : x.decode())
|
#data_table['t_start_Buf'] = data_table['t_start_Buf'].apply(lambda x : x.decode())
|
||||||
|
|
||||||
dataManager.extract_and_load_dataset_metadata()
|
#dataManager.extract_and_load_dataset_metadata()
|
||||||
dataset_metadata_df = dataManager.dataset_metadata_df.copy()
|
#dataset_metadata_df = dataManager.dataset_metadata_df.copy()
|
||||||
print(dataset_metadata_df.head())
|
print(dataset_metadata_df.head())
|
||||||
|
|
||||||
dataset_name_idx = dataset_metadata_df.index[(dataset_metadata_df['dataset_name']==args.dataset_name).to_numpy()]
|
#dataset_name_idx = dataset_metadata_df.index[(dataset_metadata_df['dataset_name']==args.dataset_name).to_numpy()]
|
||||||
data_table_metadata = dataset_metadata_df.loc[dataset_name_idx,:]
|
#data_table_metadata = dataset_metadata_df.loc[dataset_name_idx,:]
|
||||||
parent_instrument = data_table_metadata.loc[dataset_name_idx,'parent_instrument'].values[0]
|
#parent_instrument = data_table_metadata.loc[dataset_name_idx,'parent_instrument'].values[0]
|
||||||
parent_file = data_table_metadata.loc[dataset_name_idx,'parent_file'].values[0]
|
#parent_file = data_table_metadata.loc[dataset_name_idx,'parent_file'].values[0]
|
||||||
|
|
||||||
print(parent_file)
|
print(parent_file)
|
||||||
|
|
||||||
@ -228,6 +246,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Perform calibration
|
# Perform calibration
|
||||||
try:
|
try:
|
||||||
# Define output directory of apply_calibration_factors() step
|
# Define output directory of apply_calibration_factors() step
|
||||||
@ -254,7 +273,8 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
metadata = {'actris_level' : 1,
|
metadata = {'actris_level' : 1,
|
||||||
'processing_script': processingScriptRelPath.replace(os.sep,'/'),
|
'processing_script': processingScriptRelPath.replace(os.sep,'/'),
|
||||||
'processing_date' : utils.created_at()}
|
'processing_date' : utils.created_at(),
|
||||||
|
'datetime_var': datetime_var}
|
||||||
|
|
||||||
# Save output tables to csv file and save/or update data lineage record
|
# Save output tables to csv file and save/or update data lineage record
|
||||||
filename, ext = os.path.splitext(parent_file)
|
filename, ext = os.path.splitext(parent_file)
|
||||||
|
Reference in New Issue
Block a user