Synch with remote repo

This commit is contained in:
2025-02-03 10:31:48 +01:00
parent a3ccff4079
commit 32bba4239a
102 changed files with 19584 additions and 19584 deletions

View File

@ -1,151 +1,151 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"try:\n",
" import src.hdf5_writer as hdf5_writer\n",
" import src.hdf5_ops as hdf5_ops\n",
" import visualization.hdf5_vis as h5vis\n",
" import visualization.napp_plotlib as napp\n",
"\n",
" import utils.g5505_utils as utils\n",
" #import pipelines.metadata_revision as metadata_revision\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read the above specified input_file_path as a dataframe. \n",
"\n",
"Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n",
"\n",
"Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.\n",
"Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define input file directory\n",
"\n",
"input_file_path = '../input_files/BeamTimeMetaData.h5'\n",
"output_dir_path = '../output_files'\n",
"if not os.path.exists(output_dir_path):\n",
" os.makedirs(output_dir_path)\n",
"\n",
"# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n",
"input_data_df = hdf5_ops.read_mtable_as_dataframe(input_file_path)\n",
"\n",
"# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n",
"# under certain grouping specificiations.\n",
"input_data_df = input_data_df.rename(columns = {'name':'filename'})\n",
"input_data_df = utils.augment_with_filenumber(input_data_df)\n",
"input_data_df = utils.augment_with_filetype(input_data_df)\n",
"input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n",
"input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n",
"\n",
"input_data_df.columns\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then\n",
"we visualize the group hierarchy of the created file as a treemap."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n",
"# as strings refering to categorical columns in input_data_df.\n",
"\n",
"test_grouping_funcs = True\n",
"if test_grouping_funcs:\n",
" group_by_sample = lambda x : utils.group_by_df_column(x,'sample')\n",
" group_by_type = lambda x : utils.group_by_df_column(x,'filetype')\n",
" group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')\n",
"else:\n",
" group_by_sample = 'sample'\n",
" group_by_type = 'filetype'\n",
" group_by_filenumber = 'filenumber'\n",
"\n",
"import pandas as pd\n",
"import h5py\n",
"\n",
"path_to_output_filename = os.path.normpath(os.path.join(output_dir_path, 'test.h5'))\n",
"\n",
"grouping_by_vars = ['sample', 'filenumber']\n",
"\n",
"path_to_output_filename = hdf5_writer.create_hdf5_file_from_dataframe(path_to_output_filename, \n",
" input_data_df, \n",
" grouping_by_vars\n",
" )\n",
"\n",
"annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n",
" 'Producers':'Thorsten, Luca, Zoe',\n",
" 'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n",
" 'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n",
" }\n",
"\n",
"dataOpsObj = hdf5_ops.HDF5DataOpsManager(path_to_output_filename)\n",
"dataOpsObj.load_file_obj()\n",
"# Annotate root folder with annotation_dict\n",
"dataOpsObj.append_metadata('/',annotation_dict)\n",
"dataOpsObj.unload_file_obj()\n",
"\n",
"\n",
"\n",
"h5vis.display_group_hierarchy_on_a_treemap(path_to_output_filename)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"try:\n",
" import src.hdf5_writer as hdf5_writer\n",
" import src.hdf5_ops as hdf5_ops\n",
" import visualization.hdf5_vis as h5vis\n",
" import visualization.napp_plotlib as napp\n",
"\n",
" import utils.g5505_utils as utils\n",
" #import pipelines.metadata_revision as metadata_revision\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read the above specified input_file_path as a dataframe. \n",
"\n",
"Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n",
"\n",
"Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.\n",
"Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define input file directory\n",
"\n",
"input_file_path = '../input_files/BeamTimeMetaData.h5'\n",
"output_dir_path = '../output_files'\n",
"if not os.path.exists(output_dir_path):\n",
" os.makedirs(output_dir_path)\n",
"\n",
"# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n",
"input_data_df = hdf5_ops.read_mtable_as_dataframe(input_file_path)\n",
"\n",
"# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n",
"# under certain grouping specificiations.\n",
"input_data_df = input_data_df.rename(columns = {'name':'filename'})\n",
"input_data_df = utils.augment_with_filenumber(input_data_df)\n",
"input_data_df = utils.augment_with_filetype(input_data_df)\n",
"input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n",
"input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n",
"\n",
"input_data_df.columns\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then\n",
"we visualize the group hierarchy of the created file as a treemap."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n",
"# as strings refering to categorical columns in input_data_df.\n",
"\n",
"test_grouping_funcs = True\n",
"if test_grouping_funcs:\n",
" group_by_sample = lambda x : utils.group_by_df_column(x,'sample')\n",
" group_by_type = lambda x : utils.group_by_df_column(x,'filetype')\n",
" group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')\n",
"else:\n",
" group_by_sample = 'sample'\n",
" group_by_type = 'filetype'\n",
" group_by_filenumber = 'filenumber'\n",
"\n",
"import pandas as pd\n",
"import h5py\n",
"\n",
"path_to_output_filename = os.path.normpath(os.path.join(output_dir_path, 'test.h5'))\n",
"\n",
"grouping_by_vars = ['sample', 'filenumber']\n",
"\n",
"path_to_output_filename = hdf5_writer.create_hdf5_file_from_dataframe(path_to_output_filename, \n",
" input_data_df, \n",
" grouping_by_vars\n",
" )\n",
"\n",
"annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n",
" 'Producers':'Thorsten, Luca, Zoe',\n",
" 'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n",
" 'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n",
" }\n",
"\n",
"dataOpsObj = hdf5_ops.HDF5DataOpsManager(path_to_output_filename)\n",
"dataOpsObj.load_file_obj()\n",
"# Annotate root folder with annotation_dict\n",
"dataOpsObj.append_metadata('/',annotation_dict)\n",
"dataOpsObj.unload_file_obj()\n",
"\n",
"\n",
"\n",
"h5vis.display_group_hierarchy_on_a_treemap(path_to_output_filename)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,182 +1,182 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data integration workflow of experimental campaign\n",
"\n",
"In this notebook, we will go through a our data integration workflow. This involves the following steps:\n",
"\n",
"1. Specify data integration file through YAML configuration file.\n",
"2. Create an integrated HDF5 file of experimental campaign from configuration file.\n",
"3. Display the created HDF5 file using a treemap\n",
"\n",
"## Import libraries and modules\n",
"\n",
"* Excecute (or Run) the Cell below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"try:\n",
" import visualization.hdf5_vis as hdf5_vis\n",
" import pipelines.data_integration as data_integration\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Specify data integration task through YAML configuration file\n",
"\n",
"* Create your configuration file (i.e., *.yaml file) adhering to the example yaml file in the input folder.\n",
"* Set up input directory and output directory paths and Excecute Cell.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n",
"yaml_config_file_path = '../input_files/data_integr_config_file_TBR.yaml'\n",
"\n",
"#path_to_input_directory = 'output_files/kinetic_flowtube_study_2022-01-31_LuciaI'\n",
"#path_to_hdf5_file = hdf5_lib.create_hdf5_file_from_filesystem_path(path_to_input_directory)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Create an integrated HDF5 file of experimental campaign.\n",
"\n",
"* Excecute Cell. Here we run the function `integrate_data_sources` with input argument as the previously specified YAML config file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hdf5_file_path "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Display integrated HDF5 file using a treemap\n",
"\n",
"* Excecute Cell. A visual representation in html format of the integrated file should be displayed and stored in the output directory folder"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"if isinstance(hdf5_file_path ,list):\n",
" for path_item in hdf5_file_path :\n",
" hdf5_vis.display_group_hierarchy_on_a_treemap(path_item)\n",
"else:\n",
" hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import src.hdf5_ops as h5de \n",
"h5de.serialize_metadata(hdf5_file_path[0],folder_depth=3,output_format='yaml')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import src.hdf5_ops as h5de \n",
"print(hdf5_file_path)\n",
"DataOpsAPI = h5de.HDF5DataOpsManager(hdf5_file_path[0])\n",
"\n",
"DataOpsAPI.load_file_obj()\n",
"\n",
"#DataOpsAPI.reformat_datetime_column('ICAD/HONO/2022_11_22_Channel1_Data.dat/data_table',\n",
"# 'Start Date/Time (UTC)',\n",
"# '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S')\n",
"DataOpsAPI.extract_and_load_dataset_metadata()\n",
"df = DataOpsAPI.dataset_metadata_df\n",
"print(df.head())\n",
"\n",
"DataOpsAPI.unload_file_obj()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"DataOpsAPI.load_file_obj()\n",
"\n",
"DataOpsAPI.append_metadata('/',{'test_attr':'this is a test value'})\n",
"\n",
"DataOpsAPI.unload_file_obj()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data integration workflow of experimental campaign\n",
"\n",
"In this notebook, we will go through a our data integration workflow. This involves the following steps:\n",
"\n",
"1. Specify data integration file through YAML configuration file.\n",
"2. Create an integrated HDF5 file of experimental campaign from configuration file.\n",
"3. Display the created HDF5 file using a treemap\n",
"\n",
"## Import libraries and modules\n",
"\n",
"* Excecute (or Run) the Cell below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"try:\n",
" import visualization.hdf5_vis as hdf5_vis\n",
" import pipelines.data_integration as data_integration\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Specify data integration task through YAML configuration file\n",
"\n",
"* Create your configuration file (i.e., *.yaml file) adhering to the example yaml file in the input folder.\n",
"* Set up input directory and output directory paths and Excecute Cell.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n",
"yaml_config_file_path = '../input_files/data_integr_config_file_TBR.yaml'\n",
"\n",
"#path_to_input_directory = 'output_files/kinetic_flowtube_study_2022-01-31_LuciaI'\n",
"#path_to_hdf5_file = hdf5_lib.create_hdf5_file_from_filesystem_path(path_to_input_directory)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Create an integrated HDF5 file of experimental campaign.\n",
"\n",
"* Excecute Cell. Here we run the function `integrate_data_sources` with input argument as the previously specified YAML config file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hdf5_file_path "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Display integrated HDF5 file using a treemap\n",
"\n",
"* Excecute Cell. A visual representation in html format of the integrated file should be displayed and stored in the output directory folder"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"if isinstance(hdf5_file_path ,list):\n",
" for path_item in hdf5_file_path :\n",
" hdf5_vis.display_group_hierarchy_on_a_treemap(path_item)\n",
"else:\n",
" hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import src.hdf5_ops as h5de \n",
"h5de.serialize_metadata(hdf5_file_path[0],folder_depth=3,output_format='yaml')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import src.hdf5_ops as h5de \n",
"print(hdf5_file_path)\n",
"DataOpsAPI = h5de.HDF5DataOpsManager(hdf5_file_path[0])\n",
"\n",
"DataOpsAPI.load_file_obj()\n",
"\n",
"#DataOpsAPI.reformat_datetime_column('ICAD/HONO/2022_11_22_Channel1_Data.dat/data_table',\n",
"# 'Start Date/Time (UTC)',\n",
"# '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S')\n",
"DataOpsAPI.extract_and_load_dataset_metadata()\n",
"df = DataOpsAPI.dataset_metadata_df\n",
"print(df.head())\n",
"\n",
"DataOpsAPI.unload_file_obj()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"DataOpsAPI.load_file_obj()\n",
"\n",
"DataOpsAPI.append_metadata('/',{'test_attr':'this is a test value'})\n",
"\n",
"DataOpsAPI.unload_file_obj()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -1,79 +1,79 @@
import os
from nbutils import add_project_path_to_sys_path
# Add project root to sys.path
add_project_path_to_sys_path()
import datetime
import logging
try:
import src.openbis_lib as openbis_lib
import src.hdf5_ops as hdf5_ops
#import pipelines.metadata_revision as metadata_revision
print("Imports successful!")
except ImportError as e:
print(f"Import error: {e}")
def main():
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
openbis_obj = openbis_lib.initialize_openbis_obj()
# Create df with sample measurements of type 'ISS_MEASUREMENT'
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
for sample in samples:
print(type(sample))
print(sample.identifier)
df_openbis = samples.df.copy(deep=True)
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
# dataframe preprocessing steps
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
current_date = datetime.date.today()
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
logging_flag = True
#logger = logging.getLogger(__name__)
#logger.setLevel(logging.DEBUG)
log_file_path = os.path.join(os.path.curdir,log_filename)
logging.basicConfig(filename=log_file_path,
level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%d-%m-%Y %H:%M:%S",
)
for sample_idx in df_openbis.index:
# logging.basicConfig(log_filename)
#print(formatted_dict)
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
formatted_dict = "\n".join(formatted_dict)
logging.debug('\n'+formatted_dict)
#print(props_dict)
openbis_obj.logout()
# Choose samples and specifici properties to update: create a log
if __name__=="__main__":
main()
import os
from nbutils import add_project_path_to_sys_path
# Add project root to sys.path
add_project_path_to_sys_path()
import datetime
import logging
try:
import src.openbis_lib as openbis_lib
import src.hdf5_ops as hdf5_ops
#import pipelines.metadata_revision as metadata_revision
print("Imports successful!")
except ImportError as e:
print(f"Import error: {e}")
def main():
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
openbis_obj = openbis_lib.initialize_openbis_obj()
# Create df with sample measurements of type 'ISS_MEASUREMENT'
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
for sample in samples:
print(type(sample))
print(sample.identifier)
df_openbis = samples.df.copy(deep=True)
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
# dataframe preprocessing steps
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
current_date = datetime.date.today()
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
logging_flag = True
#logger = logging.getLogger(__name__)
#logger.setLevel(logging.DEBUG)
log_file_path = os.path.join(os.path.curdir,log_filename)
logging.basicConfig(filename=log_file_path,
level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%d-%m-%Y %H:%M:%S",
)
for sample_idx in df_openbis.index:
# logging.basicConfig(log_filename)
#print(formatted_dict)
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
formatted_dict = "\n".join(formatted_dict)
logging.debug('\n'+formatted_dict)
#print(props_dict)
openbis_obj.logout()
# Choose samples and specifici properties to update: create a log
if __name__=="__main__":
main()

View File

@ -1,96 +1,96 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"try:\n",
" import src.hdf5_ops as hdf5_ops\n",
" import visualization.napp_plotlib as napp\n",
" #import pipelines.metadata_revision as metadata_revision\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define h5 file name and make sure file is located at the current working dir\n",
"filename = '../input_files/FileList_v2.h5'\n",
"\n",
"# Read h5 file into dataframe\n",
"dataframe = hdf5_ops.read_mtable_as_dataframe(filename)\n",
"\n",
"\n",
"dataframe['lastModifiedDatestr']\n",
"print(dataframe.columns)\n",
"\n",
"dataframe.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataframe['image'][0].shape\n",
"\n",
"name_filter = (dataframe['name'] == '0116116_Cl2p_750eV.ibw').to_numpy()\n",
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
"\n",
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
"\n",
"napp.plot_image(dataframe,filter)\n",
"napp.plot_spectra(dataframe,filter)\n",
"\n",
"name_filter = np.array(['merge' in name for name in dataframe['name'] ])\n",
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
"\n",
"\n",
"napp.plot_spectra(dataframe,filter)\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"try:\n",
" import src.hdf5_ops as hdf5_ops\n",
" import visualization.napp_plotlib as napp\n",
" #import pipelines.metadata_revision as metadata_revision\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define h5 file name and make sure file is located at the current working dir\n",
"filename = '../input_files/FileList_v2.h5'\n",
"\n",
"# Read h5 file into dataframe\n",
"dataframe = hdf5_ops.read_mtable_as_dataframe(filename)\n",
"\n",
"\n",
"dataframe['lastModifiedDatestr']\n",
"print(dataframe.columns)\n",
"\n",
"dataframe.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataframe['image'][0].shape\n",
"\n",
"name_filter = (dataframe['name'] == '0116116_Cl2p_750eV.ibw').to_numpy()\n",
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
"\n",
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
"\n",
"napp.plot_image(dataframe,filter)\n",
"napp.plot_spectra(dataframe,filter)\n",
"\n",
"name_filter = np.array(['merge' in name for name in dataframe['name'] ])\n",
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
"\n",
"\n",
"napp.plot_spectra(dataframe,filter)\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,98 +1,98 @@
import os
from nbutils import add_project_path_to_sys_path
# Add project root to sys.path
add_project_path_to_sys_path()
import datetime
import logging
try:
import src.openbis_lib as openbis_lib
import src.hdf5_ops as hdf5_ops
#import pipelines.metadata_revision as metadata_revision
print("Imports successful!")
except ImportError as e:
print(f"Import error: {e}")
def main():
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
openbis_obj = openbis_lib.initialize_openbis_obj()
# Create df with sample measurements of type 'ISS_MEASUREMENT'
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
for sample in samples:
print(type(sample))
print(sample.identifier)
df_openbis = samples.df.copy(deep=True)
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
# dataframe preprocessing steps
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
current_date = datetime.date.today()
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
logging_flag = True
#logger = logging.getLogger(__name__)
#logger.setLevel(logging.DEBUG)
log_file_path = os.path.join(os.path.curdir,log_filename)
logging.basicConfig(filename=log_file_path,
level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%d-%m-%Y %H:%M:%S",
)
# update sample properties in openbis database only if they are labeled as bad
props_include_list = ['sample_name', 'temp', 'cell_pressure','method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time']
props_include_list = ['ke_range_center','ke_range_step']
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center','position_x','position_y','position_z']
props_include_list = ['position_x','position_y','position_z']
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center']
for sample_idx in df_openbis.index:
# logging.basicConfig(log_filename)
#print(formatted_dict)
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
#sample_props_dict[ke_range_center]
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
formatted_dict = "\n".join(formatted_dict)
logging.debug('\n'+formatted_dict)
try:
filenumber = -1 if sample_props_dict['FILENUMBER'] == '' else int(sample_props_dict['FILENUMBER'])
if filenumber >= 85 :
print(filenumber)
#if 'bad' in sample_props_dict['sample_name']:
logging.info('The above sample is to be updated in openbis:')
openbis_lib.single_sample_update(sample_props_dict,samples,props_include_list)
except KeyError:
logging.error(KeyError)
#print(props_dict)
openbis_obj.logout()
# Choose samples and specifici properties to update: create a log
if __name__=="__main__":
main()
import os
from nbutils import add_project_path_to_sys_path
# Add project root to sys.path
add_project_path_to_sys_path()
import datetime
import logging
try:
import src.openbis_lib as openbis_lib
import src.hdf5_ops as hdf5_ops
#import pipelines.metadata_revision as metadata_revision
print("Imports successful!")
except ImportError as e:
print(f"Import error: {e}")
def main():
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
openbis_obj = openbis_lib.initialize_openbis_obj()
# Create df with sample measurements of type 'ISS_MEASUREMENT'
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
for sample in samples:
print(type(sample))
print(sample.identifier)
df_openbis = samples.df.copy(deep=True)
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
# dataframe preprocessing steps
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
current_date = datetime.date.today()
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
logging_flag = True
#logger = logging.getLogger(__name__)
#logger.setLevel(logging.DEBUG)
log_file_path = os.path.join(os.path.curdir,log_filename)
logging.basicConfig(filename=log_file_path,
level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%d-%m-%Y %H:%M:%S",
)
# update sample properties in openbis database only if they are labeled as bad
props_include_list = ['sample_name', 'temp', 'cell_pressure','method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time']
props_include_list = ['ke_range_center','ke_range_step']
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center','position_x','position_y','position_z']
props_include_list = ['position_x','position_y','position_z']
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center']
for sample_idx in df_openbis.index:
# logging.basicConfig(log_filename)
#print(formatted_dict)
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
#sample_props_dict[ke_range_center]
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
formatted_dict = "\n".join(formatted_dict)
logging.debug('\n'+formatted_dict)
try:
filenumber = -1 if sample_props_dict['FILENUMBER'] == '' else int(sample_props_dict['FILENUMBER'])
if filenumber >= 85 :
print(filenumber)
#if 'bad' in sample_props_dict['sample_name']:
logging.info('The above sample is to be updated in openbis:')
openbis_lib.single_sample_update(sample_props_dict,samples,props_include_list)
except KeyError:
logging.error(KeyError)
#print(props_dict)
openbis_obj.logout()
# Choose samples and specifici properties to update: create a log
if __name__=="__main__":
main()

View File

@ -1,172 +1,172 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Metadata Annotation Process\n",
"\n",
"In this notebook, we will go through a simple metadata annotation process. This involves the following steps:\n",
"\n",
"1. Define an HDF5 file.\n",
"2. Create a YAML representation of the HDF5 file.\n",
"3. Edit and augment the YAML with metadata.\n",
"4. Update the original file based on the edited YAML.\n",
"\n",
"\n",
"## Import libraries and modules\n",
"\n",
"* Excecute (or Run) the Cell below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Imports successful!\n"
]
}
],
"source": [
"import os\n",
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"try:\n",
" import src.hdf5_ops as hdf5_ops\n",
" import pipelines.metadata_revision as metadata_revision\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Define an HDF5 file\n",
"\n",
"* Set up the string variable `hdf5_file_path` with the path to the HDF5 file of interest.\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hdf5_file_path = \"../output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Create a YAML Representation of the File\n",
"\n",
"We now convert HDF5 file structure and existing metadata into a YAML format. This will be used to add and edit metadata attributes.\n",
"\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The YAML file representation output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.json of the HDF5 file output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5 was created successfully.\n"
]
}
],
"source": [
"yaml_file_path = hdf5_ops.serialize_metadata(hdf5_file_path,output_format='json')\n",
"\n",
"if os.path.exists(yaml_file_path):\n",
" print(f'The YAML file representation {yaml_file_path} of the HDF5 file {hdf5_file_path} was created successfully.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3: Edit and Augment YAML with Metadata\n",
"\n",
"We can now manually edit the YAML file to add metadata.\n",
"* (Optional) automate your metadata annotation process by creating a program that takes the YAMl file and returns the modified version of it.\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def metadata_annotation_process(yaml_file_path):\n",
"\n",
" # Include metadata annotation logic, e.g., load yaml file and modify its content accordingly\n",
"\n",
" print(f'Ensure your edits to {yaml_file_path} have been properly incorporated and saved.')\n",
"\n",
" return yaml_file_path\n",
"\n",
"yaml_file_path = metadata_annotation_process(yaml_file_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4: Update the Original File Based on the Edited YAML\n",
"\n",
"Lastly, we will update the original file with the metadata from the YAML file.\n",
"\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"metadata_revision.update_hdf5_file_with_review(hdf5_file_path,yaml_file_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Metadata Annotation Process\n",
"\n",
"In this notebook, we will go through a simple metadata annotation process. This involves the following steps:\n",
"\n",
"1. Define an HDF5 file.\n",
"2. Create a YAML representation of the HDF5 file.\n",
"3. Edit and augment the YAML with metadata.\n",
"4. Update the original file based on the edited YAML.\n",
"\n",
"\n",
"## Import libraries and modules\n",
"\n",
"* Excecute (or Run) the Cell below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Imports successful!\n"
]
}
],
"source": [
"import os\n",
"from nbutils import add_project_path_to_sys_path\n",
"\n",
"\n",
"# Add project root to sys.path\n",
"add_project_path_to_sys_path()\n",
"\n",
"try:\n",
" import src.hdf5_ops as hdf5_ops\n",
" import pipelines.metadata_revision as metadata_revision\n",
" print(\"Imports successful!\")\n",
"except ImportError as e:\n",
" print(f\"Import error: {e}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Define an HDF5 file\n",
"\n",
"* Set up the string variable `hdf5_file_path` with the path to the HDF5 file of interest.\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hdf5_file_path = \"../output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Create a YAML Representation of the File\n",
"\n",
"We now convert HDF5 file structure and existing metadata into a YAML format. This will be used to add and edit metadata attributes.\n",
"\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The YAML file representation output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.json of the HDF5 file output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5 was created successfully.\n"
]
}
],
"source": [
"yaml_file_path = hdf5_ops.serialize_metadata(hdf5_file_path,output_format='json')\n",
"\n",
"if os.path.exists(yaml_file_path):\n",
" print(f'The YAML file representation {yaml_file_path} of the HDF5 file {hdf5_file_path} was created successfully.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3: Edit and Augment YAML with Metadata\n",
"\n",
"We can now manually edit the YAML file to add metadata.\n",
"* (Optional) automate your metadata annotation process by creating a program that takes the YAMl file and returns the modified version of it.\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def metadata_annotation_process(yaml_file_path):\n",
"\n",
" # Include metadata annotation logic, e.g., load yaml file and modify its content accordingly\n",
"\n",
" print(f'Ensure your edits to {yaml_file_path} have been properly incorporated and saved.')\n",
"\n",
" return yaml_file_path\n",
"\n",
"yaml_file_path = metadata_annotation_process(yaml_file_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4: Update the Original File Based on the Edited YAML\n",
"\n",
"Lastly, we will update the original file with the metadata from the YAML file.\n",
"\n",
"* Excecute Cell."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"metadata_revision.update_hdf5_file_with_review(hdf5_file_path,yaml_file_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multiphase_chemistry_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,16 +1,16 @@
import sys
import os
def add_project_path_to_sys_path():
"""
Adds the project path (root directory containing the package) to sys.path.
"""
# Determine the root directory (project_root, which contains 'dima')
notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)
project_path = os.path.normpath(os.path.join(notebook_dir, "..")) # Move up to project root
if project_path not in sys.path: # Avoid duplicate entries
sys.path.append(project_path)
if __name__ == "__main__":
import sys
import os
def add_project_path_to_sys_path():
"""
Adds the project path (root directory containing the package) to sys.path.
"""
# Determine the root directory (project_root, which contains 'dima')
notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)
project_path = os.path.normpath(os.path.join(notebook_dir, "..")) # Move up to project root
if project_path not in sys.path: # Avoid duplicate entries
sys.path.append(project_path)
if __name__ == "__main__":
add_project_path_to_sys_path()