Synch with remote repo
This commit is contained in:
@ -1,151 +1,151 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import src.hdf5_writer as hdf5_writer\n",
|
||||
" import src.hdf5_ops as hdf5_ops\n",
|
||||
" import visualization.hdf5_vis as h5vis\n",
|
||||
" import visualization.napp_plotlib as napp\n",
|
||||
"\n",
|
||||
" import utils.g5505_utils as utils\n",
|
||||
" #import pipelines.metadata_revision as metadata_revision\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Read the above specified input_file_path as a dataframe. \n",
|
||||
"\n",
|
||||
"Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n",
|
||||
"\n",
|
||||
"Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.\n",
|
||||
"Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define input file directory\n",
|
||||
"\n",
|
||||
"input_file_path = '../input_files/BeamTimeMetaData.h5'\n",
|
||||
"output_dir_path = '../output_files'\n",
|
||||
"if not os.path.exists(output_dir_path):\n",
|
||||
" os.makedirs(output_dir_path)\n",
|
||||
"\n",
|
||||
"# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n",
|
||||
"input_data_df = hdf5_ops.read_mtable_as_dataframe(input_file_path)\n",
|
||||
"\n",
|
||||
"# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n",
|
||||
"# under certain grouping specificiations.\n",
|
||||
"input_data_df = input_data_df.rename(columns = {'name':'filename'})\n",
|
||||
"input_data_df = utils.augment_with_filenumber(input_data_df)\n",
|
||||
"input_data_df = utils.augment_with_filetype(input_data_df)\n",
|
||||
"input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n",
|
||||
"input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n",
|
||||
"\n",
|
||||
"input_data_df.columns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then\n",
|
||||
"we visualize the group hierarchy of the created file as a treemap."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n",
|
||||
"# as strings refering to categorical columns in input_data_df.\n",
|
||||
"\n",
|
||||
"test_grouping_funcs = True\n",
|
||||
"if test_grouping_funcs:\n",
|
||||
" group_by_sample = lambda x : utils.group_by_df_column(x,'sample')\n",
|
||||
" group_by_type = lambda x : utils.group_by_df_column(x,'filetype')\n",
|
||||
" group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')\n",
|
||||
"else:\n",
|
||||
" group_by_sample = 'sample'\n",
|
||||
" group_by_type = 'filetype'\n",
|
||||
" group_by_filenumber = 'filenumber'\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import h5py\n",
|
||||
"\n",
|
||||
"path_to_output_filename = os.path.normpath(os.path.join(output_dir_path, 'test.h5'))\n",
|
||||
"\n",
|
||||
"grouping_by_vars = ['sample', 'filenumber']\n",
|
||||
"\n",
|
||||
"path_to_output_filename = hdf5_writer.create_hdf5_file_from_dataframe(path_to_output_filename, \n",
|
||||
" input_data_df, \n",
|
||||
" grouping_by_vars\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n",
|
||||
" 'Producers':'Thorsten, Luca, Zoe',\n",
|
||||
" 'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n",
|
||||
" 'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"dataOpsObj = hdf5_ops.HDF5DataOpsManager(path_to_output_filename)\n",
|
||||
"dataOpsObj.load_file_obj()\n",
|
||||
"# Annotate root folder with annotation_dict\n",
|
||||
"dataOpsObj.append_metadata('/',annotation_dict)\n",
|
||||
"dataOpsObj.unload_file_obj()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"h5vis.display_group_hierarchy_on_a_treemap(path_to_output_filename)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import src.hdf5_writer as hdf5_writer\n",
|
||||
" import src.hdf5_ops as hdf5_ops\n",
|
||||
" import visualization.hdf5_vis as h5vis\n",
|
||||
" import visualization.napp_plotlib as napp\n",
|
||||
"\n",
|
||||
" import utils.g5505_utils as utils\n",
|
||||
" #import pipelines.metadata_revision as metadata_revision\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Read the above specified input_file_path as a dataframe. \n",
|
||||
"\n",
|
||||
"Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n",
|
||||
"\n",
|
||||
"Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.\n",
|
||||
"Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define input file directory\n",
|
||||
"\n",
|
||||
"input_file_path = '../input_files/BeamTimeMetaData.h5'\n",
|
||||
"output_dir_path = '../output_files'\n",
|
||||
"if not os.path.exists(output_dir_path):\n",
|
||||
" os.makedirs(output_dir_path)\n",
|
||||
"\n",
|
||||
"# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n",
|
||||
"input_data_df = hdf5_ops.read_mtable_as_dataframe(input_file_path)\n",
|
||||
"\n",
|
||||
"# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n",
|
||||
"# under certain grouping specificiations.\n",
|
||||
"input_data_df = input_data_df.rename(columns = {'name':'filename'})\n",
|
||||
"input_data_df = utils.augment_with_filenumber(input_data_df)\n",
|
||||
"input_data_df = utils.augment_with_filetype(input_data_df)\n",
|
||||
"input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n",
|
||||
"input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n",
|
||||
"\n",
|
||||
"input_data_df.columns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then\n",
|
||||
"we visualize the group hierarchy of the created file as a treemap."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n",
|
||||
"# as strings refering to categorical columns in input_data_df.\n",
|
||||
"\n",
|
||||
"test_grouping_funcs = True\n",
|
||||
"if test_grouping_funcs:\n",
|
||||
" group_by_sample = lambda x : utils.group_by_df_column(x,'sample')\n",
|
||||
" group_by_type = lambda x : utils.group_by_df_column(x,'filetype')\n",
|
||||
" group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')\n",
|
||||
"else:\n",
|
||||
" group_by_sample = 'sample'\n",
|
||||
" group_by_type = 'filetype'\n",
|
||||
" group_by_filenumber = 'filenumber'\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import h5py\n",
|
||||
"\n",
|
||||
"path_to_output_filename = os.path.normpath(os.path.join(output_dir_path, 'test.h5'))\n",
|
||||
"\n",
|
||||
"grouping_by_vars = ['sample', 'filenumber']\n",
|
||||
"\n",
|
||||
"path_to_output_filename = hdf5_writer.create_hdf5_file_from_dataframe(path_to_output_filename, \n",
|
||||
" input_data_df, \n",
|
||||
" grouping_by_vars\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n",
|
||||
" 'Producers':'Thorsten, Luca, Zoe',\n",
|
||||
" 'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n",
|
||||
" 'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"dataOpsObj = hdf5_ops.HDF5DataOpsManager(path_to_output_filename)\n",
|
||||
"dataOpsObj.load_file_obj()\n",
|
||||
"# Annotate root folder with annotation_dict\n",
|
||||
"dataOpsObj.append_metadata('/',annotation_dict)\n",
|
||||
"dataOpsObj.unload_file_obj()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"h5vis.display_group_hierarchy_on_a_treemap(path_to_output_filename)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
@ -1,182 +1,182 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data integration workflow of experimental campaign\n",
|
||||
"\n",
|
||||
"In this notebook, we will go through a our data integration workflow. This involves the following steps:\n",
|
||||
"\n",
|
||||
"1. Specify data integration file through YAML configuration file.\n",
|
||||
"2. Create an integrated HDF5 file of experimental campaign from configuration file.\n",
|
||||
"3. Display the created HDF5 file using a treemap\n",
|
||||
"\n",
|
||||
"## Import libraries and modules\n",
|
||||
"\n",
|
||||
"* Excecute (or Run) the Cell below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import visualization.hdf5_vis as hdf5_vis\n",
|
||||
" import pipelines.data_integration as data_integration\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Specify data integration task through YAML configuration file\n",
|
||||
"\n",
|
||||
"* Create your configuration file (i.e., *.yaml file) adhering to the example yaml file in the input folder.\n",
|
||||
"* Set up input directory and output directory paths and Excecute Cell.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n",
|
||||
"yaml_config_file_path = '../input_files/data_integr_config_file_TBR.yaml'\n",
|
||||
"\n",
|
||||
"#path_to_input_directory = 'output_files/kinetic_flowtube_study_2022-01-31_LuciaI'\n",
|
||||
"#path_to_hdf5_file = hdf5_lib.create_hdf5_file_from_filesystem_path(path_to_input_directory)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Create an integrated HDF5 file of experimental campaign.\n",
|
||||
"\n",
|
||||
"* Excecute Cell. Here we run the function `integrate_data_sources` with input argument as the previously specified YAML config file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hdf5_file_path "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Display integrated HDF5 file using a treemap\n",
|
||||
"\n",
|
||||
"* Excecute Cell. A visual representation in html format of the integrated file should be displayed and stored in the output directory folder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"if isinstance(hdf5_file_path ,list):\n",
|
||||
" for path_item in hdf5_file_path :\n",
|
||||
" hdf5_vis.display_group_hierarchy_on_a_treemap(path_item)\n",
|
||||
"else:\n",
|
||||
" hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import src.hdf5_ops as h5de \n",
|
||||
"h5de.serialize_metadata(hdf5_file_path[0],folder_depth=3,output_format='yaml')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import src.hdf5_ops as h5de \n",
|
||||
"print(hdf5_file_path)\n",
|
||||
"DataOpsAPI = h5de.HDF5DataOpsManager(hdf5_file_path[0])\n",
|
||||
"\n",
|
||||
"DataOpsAPI.load_file_obj()\n",
|
||||
"\n",
|
||||
"#DataOpsAPI.reformat_datetime_column('ICAD/HONO/2022_11_22_Channel1_Data.dat/data_table',\n",
|
||||
"# 'Start Date/Time (UTC)',\n",
|
||||
"# '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S')\n",
|
||||
"DataOpsAPI.extract_and_load_dataset_metadata()\n",
|
||||
"df = DataOpsAPI.dataset_metadata_df\n",
|
||||
"print(df.head())\n",
|
||||
"\n",
|
||||
"DataOpsAPI.unload_file_obj()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DataOpsAPI.load_file_obj()\n",
|
||||
"\n",
|
||||
"DataOpsAPI.append_metadata('/',{'test_attr':'this is a test value'})\n",
|
||||
"\n",
|
||||
"DataOpsAPI.unload_file_obj()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data integration workflow of experimental campaign\n",
|
||||
"\n",
|
||||
"In this notebook, we will go through a our data integration workflow. This involves the following steps:\n",
|
||||
"\n",
|
||||
"1. Specify data integration file through YAML configuration file.\n",
|
||||
"2. Create an integrated HDF5 file of experimental campaign from configuration file.\n",
|
||||
"3. Display the created HDF5 file using a treemap\n",
|
||||
"\n",
|
||||
"## Import libraries and modules\n",
|
||||
"\n",
|
||||
"* Excecute (or Run) the Cell below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import visualization.hdf5_vis as hdf5_vis\n",
|
||||
" import pipelines.data_integration as data_integration\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Specify data integration task through YAML configuration file\n",
|
||||
"\n",
|
||||
"* Create your configuration file (i.e., *.yaml file) adhering to the example yaml file in the input folder.\n",
|
||||
"* Set up input directory and output directory paths and Excecute Cell.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n",
|
||||
"yaml_config_file_path = '../input_files/data_integr_config_file_TBR.yaml'\n",
|
||||
"\n",
|
||||
"#path_to_input_directory = 'output_files/kinetic_flowtube_study_2022-01-31_LuciaI'\n",
|
||||
"#path_to_hdf5_file = hdf5_lib.create_hdf5_file_from_filesystem_path(path_to_input_directory)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Create an integrated HDF5 file of experimental campaign.\n",
|
||||
"\n",
|
||||
"* Excecute Cell. Here we run the function `integrate_data_sources` with input argument as the previously specified YAML config file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hdf5_file_path "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Display integrated HDF5 file using a treemap\n",
|
||||
"\n",
|
||||
"* Excecute Cell. A visual representation in html format of the integrated file should be displayed and stored in the output directory folder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"if isinstance(hdf5_file_path ,list):\n",
|
||||
" for path_item in hdf5_file_path :\n",
|
||||
" hdf5_vis.display_group_hierarchy_on_a_treemap(path_item)\n",
|
||||
"else:\n",
|
||||
" hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import src.hdf5_ops as h5de \n",
|
||||
"h5de.serialize_metadata(hdf5_file_path[0],folder_depth=3,output_format='yaml')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import src.hdf5_ops as h5de \n",
|
||||
"print(hdf5_file_path)\n",
|
||||
"DataOpsAPI = h5de.HDF5DataOpsManager(hdf5_file_path[0])\n",
|
||||
"\n",
|
||||
"DataOpsAPI.load_file_obj()\n",
|
||||
"\n",
|
||||
"#DataOpsAPI.reformat_datetime_column('ICAD/HONO/2022_11_22_Channel1_Data.dat/data_table',\n",
|
||||
"# 'Start Date/Time (UTC)',\n",
|
||||
"# '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S')\n",
|
||||
"DataOpsAPI.extract_and_load_dataset_metadata()\n",
|
||||
"df = DataOpsAPI.dataset_metadata_df\n",
|
||||
"print(df.head())\n",
|
||||
"\n",
|
||||
"DataOpsAPI.unload_file_obj()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DataOpsAPI.load_file_obj()\n",
|
||||
"\n",
|
||||
"DataOpsAPI.append_metadata('/',{'test_attr':'this is a test value'})\n",
|
||||
"\n",
|
||||
"DataOpsAPI.unload_file_obj()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -1,79 +1,79 @@
|
||||
import os
|
||||
from nbutils import add_project_path_to_sys_path
|
||||
|
||||
|
||||
# Add project root to sys.path
|
||||
add_project_path_to_sys_path()
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
try:
|
||||
import src.openbis_lib as openbis_lib
|
||||
import src.hdf5_ops as hdf5_ops
|
||||
#import pipelines.metadata_revision as metadata_revision
|
||||
print("Imports successful!")
|
||||
except ImportError as e:
|
||||
print(f"Import error: {e}")
|
||||
|
||||
def main():
|
||||
|
||||
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
|
||||
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
|
||||
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
|
||||
|
||||
|
||||
openbis_obj = openbis_lib.initialize_openbis_obj()
|
||||
|
||||
# Create df with sample measurements of type 'ISS_MEASUREMENT'
|
||||
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
|
||||
for sample in samples:
|
||||
print(type(sample))
|
||||
print(sample.identifier)
|
||||
df_openbis = samples.df.copy(deep=True)
|
||||
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
|
||||
|
||||
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
|
||||
|
||||
# dataframe preprocessing steps
|
||||
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
|
||||
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
|
||||
|
||||
|
||||
|
||||
current_date = datetime.date.today()
|
||||
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
|
||||
logging_flag = True
|
||||
|
||||
#logger = logging.getLogger(__name__)
|
||||
#logger.setLevel(logging.DEBUG)
|
||||
|
||||
log_file_path = os.path.join(os.path.curdir,log_filename)
|
||||
|
||||
logging.basicConfig(filename=log_file_path,
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%d-%m-%Y %H:%M:%S",
|
||||
)
|
||||
|
||||
for sample_idx in df_openbis.index:
|
||||
|
||||
# logging.basicConfig(log_filename)
|
||||
#print(formatted_dict)
|
||||
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
|
||||
|
||||
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
|
||||
formatted_dict = "\n".join(formatted_dict)
|
||||
|
||||
logging.debug('\n'+formatted_dict)
|
||||
|
||||
|
||||
#print(props_dict)
|
||||
openbis_obj.logout()
|
||||
|
||||
# Choose samples and specifici properties to update: create a log
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
|
||||
import os
|
||||
from nbutils import add_project_path_to_sys_path
|
||||
|
||||
|
||||
# Add project root to sys.path
|
||||
add_project_path_to_sys_path()
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
try:
|
||||
import src.openbis_lib as openbis_lib
|
||||
import src.hdf5_ops as hdf5_ops
|
||||
#import pipelines.metadata_revision as metadata_revision
|
||||
print("Imports successful!")
|
||||
except ImportError as e:
|
||||
print(f"Import error: {e}")
|
||||
|
||||
def main():
|
||||
|
||||
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
|
||||
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
|
||||
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
|
||||
|
||||
|
||||
openbis_obj = openbis_lib.initialize_openbis_obj()
|
||||
|
||||
# Create df with sample measurements of type 'ISS_MEASUREMENT'
|
||||
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
|
||||
for sample in samples:
|
||||
print(type(sample))
|
||||
print(sample.identifier)
|
||||
df_openbis = samples.df.copy(deep=True)
|
||||
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
|
||||
|
||||
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
|
||||
|
||||
# dataframe preprocessing steps
|
||||
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
|
||||
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
|
||||
|
||||
|
||||
|
||||
current_date = datetime.date.today()
|
||||
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
|
||||
logging_flag = True
|
||||
|
||||
#logger = logging.getLogger(__name__)
|
||||
#logger.setLevel(logging.DEBUG)
|
||||
|
||||
log_file_path = os.path.join(os.path.curdir,log_filename)
|
||||
|
||||
logging.basicConfig(filename=log_file_path,
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%d-%m-%Y %H:%M:%S",
|
||||
)
|
||||
|
||||
for sample_idx in df_openbis.index:
|
||||
|
||||
# logging.basicConfig(log_filename)
|
||||
#print(formatted_dict)
|
||||
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
|
||||
|
||||
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
|
||||
formatted_dict = "\n".join(formatted_dict)
|
||||
|
||||
logging.debug('\n'+formatted_dict)
|
||||
|
||||
|
||||
#print(props_dict)
|
||||
openbis_obj.logout()
|
||||
|
||||
# Choose samples and specifici properties to update: create a log
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
|
||||
|
@ -1,96 +1,96 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import src.hdf5_ops as hdf5_ops\n",
|
||||
" import visualization.napp_plotlib as napp\n",
|
||||
" #import pipelines.metadata_revision as metadata_revision\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define h5 file name and make sure file is located at the current working dir\n",
|
||||
"filename = '../input_files/FileList_v2.h5'\n",
|
||||
"\n",
|
||||
"# Read h5 file into dataframe\n",
|
||||
"dataframe = hdf5_ops.read_mtable_as_dataframe(filename)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dataframe['lastModifiedDatestr']\n",
|
||||
"print(dataframe.columns)\n",
|
||||
"\n",
|
||||
"dataframe.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataframe['image'][0].shape\n",
|
||||
"\n",
|
||||
"name_filter = (dataframe['name'] == '0116116_Cl2p_750eV.ibw').to_numpy()\n",
|
||||
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
|
||||
"\n",
|
||||
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
|
||||
"\n",
|
||||
"napp.plot_image(dataframe,filter)\n",
|
||||
"napp.plot_spectra(dataframe,filter)\n",
|
||||
"\n",
|
||||
"name_filter = np.array(['merge' in name for name in dataframe['name'] ])\n",
|
||||
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
|
||||
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"napp.plot_spectra(dataframe,filter)\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import src.hdf5_ops as hdf5_ops\n",
|
||||
" import visualization.napp_plotlib as napp\n",
|
||||
" #import pipelines.metadata_revision as metadata_revision\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define h5 file name and make sure file is located at the current working dir\n",
|
||||
"filename = '../input_files/FileList_v2.h5'\n",
|
||||
"\n",
|
||||
"# Read h5 file into dataframe\n",
|
||||
"dataframe = hdf5_ops.read_mtable_as_dataframe(filename)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dataframe['lastModifiedDatestr']\n",
|
||||
"print(dataframe.columns)\n",
|
||||
"\n",
|
||||
"dataframe.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataframe['image'][0].shape\n",
|
||||
"\n",
|
||||
"name_filter = (dataframe['name'] == '0116116_Cl2p_750eV.ibw').to_numpy()\n",
|
||||
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
|
||||
"\n",
|
||||
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
|
||||
"\n",
|
||||
"napp.plot_image(dataframe,filter)\n",
|
||||
"napp.plot_spectra(dataframe,filter)\n",
|
||||
"\n",
|
||||
"name_filter = np.array(['merge' in name for name in dataframe['name'] ])\n",
|
||||
"date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
|
||||
"filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"napp.plot_spectra(dataframe,filter)\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
@ -1,98 +1,98 @@
|
||||
import os
|
||||
from nbutils import add_project_path_to_sys_path
|
||||
|
||||
|
||||
# Add project root to sys.path
|
||||
add_project_path_to_sys_path()
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
try:
|
||||
import src.openbis_lib as openbis_lib
|
||||
import src.hdf5_ops as hdf5_ops
|
||||
#import pipelines.metadata_revision as metadata_revision
|
||||
print("Imports successful!")
|
||||
except ImportError as e:
|
||||
print(f"Import error: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
|
||||
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
|
||||
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
|
||||
|
||||
|
||||
openbis_obj = openbis_lib.initialize_openbis_obj()
|
||||
|
||||
# Create df with sample measurements of type 'ISS_MEASUREMENT'
|
||||
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
|
||||
for sample in samples:
|
||||
print(type(sample))
|
||||
print(sample.identifier)
|
||||
df_openbis = samples.df.copy(deep=True)
|
||||
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
|
||||
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
|
||||
|
||||
# dataframe preprocessing steps
|
||||
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
|
||||
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
|
||||
|
||||
|
||||
|
||||
current_date = datetime.date.today()
|
||||
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
|
||||
logging_flag = True
|
||||
|
||||
#logger = logging.getLogger(__name__)
|
||||
#logger.setLevel(logging.DEBUG)
|
||||
|
||||
log_file_path = os.path.join(os.path.curdir,log_filename)
|
||||
|
||||
logging.basicConfig(filename=log_file_path,
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%d-%m-%Y %H:%M:%S",
|
||||
)
|
||||
|
||||
# update sample properties in openbis database only if they are labeled as bad
|
||||
|
||||
props_include_list = ['sample_name', 'temp', 'cell_pressure','method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time']
|
||||
props_include_list = ['ke_range_center','ke_range_step']
|
||||
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center','position_x','position_y','position_z']
|
||||
|
||||
props_include_list = ['position_x','position_y','position_z']
|
||||
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center']
|
||||
|
||||
|
||||
for sample_idx in df_openbis.index:
|
||||
|
||||
# logging.basicConfig(log_filename)
|
||||
#print(formatted_dict)
|
||||
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
|
||||
|
||||
#sample_props_dict[ke_range_center]
|
||||
|
||||
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
|
||||
formatted_dict = "\n".join(formatted_dict)
|
||||
logging.debug('\n'+formatted_dict)
|
||||
try:
|
||||
filenumber = -1 if sample_props_dict['FILENUMBER'] == '' else int(sample_props_dict['FILENUMBER'])
|
||||
|
||||
if filenumber >= 85 :
|
||||
print(filenumber)
|
||||
#if 'bad' in sample_props_dict['sample_name']:
|
||||
logging.info('The above sample is to be updated in openbis:')
|
||||
openbis_lib.single_sample_update(sample_props_dict,samples,props_include_list)
|
||||
except KeyError:
|
||||
logging.error(KeyError)
|
||||
#print(props_dict)
|
||||
openbis_obj.logout()
|
||||
|
||||
# Choose samples and specifici properties to update: create a log
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
|
||||
import os
|
||||
from nbutils import add_project_path_to_sys_path
|
||||
|
||||
|
||||
# Add project root to sys.path
|
||||
add_project_path_to_sys_path()
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
try:
|
||||
import src.openbis_lib as openbis_lib
|
||||
import src.hdf5_ops as hdf5_ops
|
||||
#import pipelines.metadata_revision as metadata_revision
|
||||
print("Imports successful!")
|
||||
except ImportError as e:
|
||||
print(f"Import error: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
#df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
|
||||
#df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
|
||||
#df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
|
||||
|
||||
|
||||
openbis_obj = openbis_lib.initialize_openbis_obj()
|
||||
|
||||
# Create df with sample measurements of type 'ISS_MEASUREMENT'
|
||||
samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
|
||||
for sample in samples:
|
||||
print(type(sample))
|
||||
print(sample.identifier)
|
||||
df_openbis = samples.df.copy(deep=True)
|
||||
h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
|
||||
df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
|
||||
|
||||
# dataframe preprocessing steps
|
||||
df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)
|
||||
df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
|
||||
|
||||
|
||||
|
||||
current_date = datetime.date.today()
|
||||
log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
|
||||
logging_flag = True
|
||||
|
||||
#logger = logging.getLogger(__name__)
|
||||
#logger.setLevel(logging.DEBUG)
|
||||
|
||||
log_file_path = os.path.join(os.path.curdir,log_filename)
|
||||
|
||||
logging.basicConfig(filename=log_file_path,
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
datefmt="%d-%m-%Y %H:%M:%S",
|
||||
)
|
||||
|
||||
# update sample properties in openbis database only if they are labeled as bad
|
||||
|
||||
props_include_list = ['sample_name', 'temp', 'cell_pressure','method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time']
|
||||
props_include_list = ['ke_range_center','ke_range_step']
|
||||
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center','position_x','position_y','position_z']
|
||||
|
||||
props_include_list = ['position_x','position_y','position_z']
|
||||
props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center']
|
||||
|
||||
|
||||
for sample_idx in df_openbis.index:
|
||||
|
||||
# logging.basicConfig(log_filename)
|
||||
#print(formatted_dict)
|
||||
sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
|
||||
|
||||
#sample_props_dict[ke_range_center]
|
||||
|
||||
formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
|
||||
formatted_dict = "\n".join(formatted_dict)
|
||||
logging.debug('\n'+formatted_dict)
|
||||
try:
|
||||
filenumber = -1 if sample_props_dict['FILENUMBER'] == '' else int(sample_props_dict['FILENUMBER'])
|
||||
|
||||
if filenumber >= 85 :
|
||||
print(filenumber)
|
||||
#if 'bad' in sample_props_dict['sample_name']:
|
||||
logging.info('The above sample is to be updated in openbis:')
|
||||
openbis_lib.single_sample_update(sample_props_dict,samples,props_include_list)
|
||||
except KeyError:
|
||||
logging.error(KeyError)
|
||||
#print(props_dict)
|
||||
openbis_obj.logout()
|
||||
|
||||
# Choose samples and specifici properties to update: create a log
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
|
||||
|
@ -1,172 +1,172 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Metadata Annotation Process\n",
|
||||
"\n",
|
||||
"In this notebook, we will go through a simple metadata annotation process. This involves the following steps:\n",
|
||||
"\n",
|
||||
"1. Define an HDF5 file.\n",
|
||||
"2. Create a YAML representation of the HDF5 file.\n",
|
||||
"3. Edit and augment the YAML with metadata.\n",
|
||||
"4. Update the original file based on the edited YAML.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Import libraries and modules\n",
|
||||
"\n",
|
||||
"* Excecute (or Run) the Cell below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Imports successful!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import src.hdf5_ops as hdf5_ops\n",
|
||||
" import pipelines.metadata_revision as metadata_revision\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Define an HDF5 file\n",
|
||||
"\n",
|
||||
"* Set up the string variable `hdf5_file_path` with the path to the HDF5 file of interest.\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hdf5_file_path = \"../output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Create a YAML Representation of the File\n",
|
||||
"\n",
|
||||
"We now convert HDF5 file structure and existing metadata into a YAML format. This will be used to add and edit metadata attributes.\n",
|
||||
"\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The YAML file representation output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.json of the HDF5 file output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5 was created successfully.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"yaml_file_path = hdf5_ops.serialize_metadata(hdf5_file_path,output_format='json')\n",
|
||||
"\n",
|
||||
"if os.path.exists(yaml_file_path):\n",
|
||||
" print(f'The YAML file representation {yaml_file_path} of the HDF5 file {hdf5_file_path} was created successfully.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 3: Edit and Augment YAML with Metadata\n",
|
||||
"\n",
|
||||
"We can now manually edit the YAML file to add metadata.\n",
|
||||
"* (Optional) automate your metadata annotation process by creating a program that takes the YAMl file and returns the modified version of it.\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def metadata_annotation_process(yaml_file_path):\n",
|
||||
"\n",
|
||||
" # Include metadata annotation logic, e.g., load yaml file and modify its content accordingly\n",
|
||||
"\n",
|
||||
" print(f'Ensure your edits to {yaml_file_path} have been properly incorporated and saved.')\n",
|
||||
"\n",
|
||||
" return yaml_file_path\n",
|
||||
"\n",
|
||||
"yaml_file_path = metadata_annotation_process(yaml_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 4: Update the Original File Based on the Edited YAML\n",
|
||||
"\n",
|
||||
"Lastly, we will update the original file with the metadata from the YAML file.\n",
|
||||
"\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"metadata_revision.update_hdf5_file_with_review(hdf5_file_path,yaml_file_path)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Metadata Annotation Process\n",
|
||||
"\n",
|
||||
"In this notebook, we will go through a simple metadata annotation process. This involves the following steps:\n",
|
||||
"\n",
|
||||
"1. Define an HDF5 file.\n",
|
||||
"2. Create a YAML representation of the HDF5 file.\n",
|
||||
"3. Edit and augment the YAML with metadata.\n",
|
||||
"4. Update the original file based on the edited YAML.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Import libraries and modules\n",
|
||||
"\n",
|
||||
"* Excecute (or Run) the Cell below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Imports successful!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from nbutils import add_project_path_to_sys_path\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Add project root to sys.path\n",
|
||||
"add_project_path_to_sys_path()\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import src.hdf5_ops as hdf5_ops\n",
|
||||
" import pipelines.metadata_revision as metadata_revision\n",
|
||||
" print(\"Imports successful!\")\n",
|
||||
"except ImportError as e:\n",
|
||||
" print(f\"Import error: {e}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Define an HDF5 file\n",
|
||||
"\n",
|
||||
"* Set up the string variable `hdf5_file_path` with the path to the HDF5 file of interest.\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hdf5_file_path = \"../output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Create a YAML Representation of the File\n",
|
||||
"\n",
|
||||
"We now convert HDF5 file structure and existing metadata into a YAML format. This will be used to add and edit metadata attributes.\n",
|
||||
"\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The YAML file representation output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.json of the HDF5 file output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5 was created successfully.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"yaml_file_path = hdf5_ops.serialize_metadata(hdf5_file_path,output_format='json')\n",
|
||||
"\n",
|
||||
"if os.path.exists(yaml_file_path):\n",
|
||||
" print(f'The YAML file representation {yaml_file_path} of the HDF5 file {hdf5_file_path} was created successfully.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 3: Edit and Augment YAML with Metadata\n",
|
||||
"\n",
|
||||
"We can now manually edit the YAML file to add metadata.\n",
|
||||
"* (Optional) automate your metadata annotation process by creating a program that takes the YAMl file and returns the modified version of it.\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def metadata_annotation_process(yaml_file_path):\n",
|
||||
"\n",
|
||||
" # Include metadata annotation logic, e.g., load yaml file and modify its content accordingly\n",
|
||||
"\n",
|
||||
" print(f'Ensure your edits to {yaml_file_path} have been properly incorporated and saved.')\n",
|
||||
"\n",
|
||||
" return yaml_file_path\n",
|
||||
"\n",
|
||||
"yaml_file_path = metadata_annotation_process(yaml_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 4: Update the Original File Based on the Edited YAML\n",
|
||||
"\n",
|
||||
"Lastly, we will update the original file with the metadata from the YAML file.\n",
|
||||
"\n",
|
||||
"* Excecute Cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"metadata_revision.update_hdf5_file_with_review(hdf5_file_path,yaml_file_path)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
@ -1,16 +1,16 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
def add_project_path_to_sys_path():
|
||||
"""
|
||||
Adds the project path (root directory containing the package) to sys.path.
|
||||
"""
|
||||
# Determine the root directory (project_root, which contains 'dima')
|
||||
notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)
|
||||
project_path = os.path.normpath(os.path.join(notebook_dir, "..")) # Move up to project root
|
||||
|
||||
if project_path not in sys.path: # Avoid duplicate entries
|
||||
sys.path.append(project_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import os
|
||||
|
||||
def add_project_path_to_sys_path():
|
||||
"""
|
||||
Adds the project path (root directory containing the package) to sys.path.
|
||||
"""
|
||||
# Determine the root directory (project_root, which contains 'dima')
|
||||
notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)
|
||||
project_path = os.path.normpath(os.path.join(notebook_dir, "..")) # Move up to project root
|
||||
|
||||
if project_path not in sys.path: # Avoid duplicate entries
|
||||
sys.path.append(project_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
add_project_path_to_sys_path()
|
Reference in New Issue
Block a user