Initial commit. hdf5_lib.py contains functions to read hdf5 file as dataframe. and napp_plotlib contains functions to plot image and spectra. jupyter notebook included to illustrate functions.

2023-08-22 14:28:27 +02:00
parent 8c4851ffea
commit 3e9a26f147
5 changed files with 235 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+*.pyc
+__pycache__/
--- a/FileList.h5
+++ b/FileList.h5
--- a/demo_hdf5_data_sharing_and_plotting.ipynb
+++ b/demo_hdf5_data_sharing_and_plotting.ipynb
--- a/hdf5_lib.py
+++ b/hdf5_lib.py
@ -0,0 +1,52 @@
+import pandas as pd
+import h5py
+#import os
+import sys
+
+filename = 'FileList.h5'
+
+def read_hdf5_as_dataframe(filename):
+
+    with h5py.File(filename,'r') as file:
+     
+        # Define group's attributes and datasets. This should hold
+        # for all groups. TODO: implement verification and noncompliance error if needed.
+        group_list = list(file.keys())
+        group_attrs = list(file[group_list[0]].attrs.keys())
+        # 
+        column_attr_names = [item[item.find('_')+1::] for item in group_attrs]
+        column_attr_names_idx = [int(item[4:(item.find('_'))]) for item in group_attrs]
+    
+        group_datasets = list(file[group_list[0]].keys())
+        #
+        column_dataset_names = [file[group_list[0]][item].attrs['column_name'] for item in group_datasets]
+        column_dataset_names_idx = [int(item[2:]) for item in group_datasets]
+
+
+        # Define data_frame as group_attrs + group_datasets
+        #pd_series_index = group_attrs + group_datasets
+        pd_series_index = column_attr_names + column_dataset_names
+
+        output_dataframe = pd.DataFrame(columns=pd_series_index,index=group_list)
+
+        for group_key in group_list:        
+            # Print group_name
+            #print(group_key)
+            tmp_row = []
+            for attr_key in group_attrs:
+                #print(type(file[group_key].attrs[attr_key]))
+                tmp_row.append(file[group_key].attrs[attr_key])
+            for ds_key in group_datasets:
+                # Check dataset's type by uncommenting the line below
+                # print(type(file[group_key][ds_key][()]))
+
+                # Append to list the value of the file at dataset /group/ds
+                tmp_row.append(file[group_key][ds_key][()])
+
+            # Create pandas Series/measurement
+            row = pd.Series(data=tmp_row,index=pd_series_index, name = group_key)
+            output_dataframe.loc[group_key,:] = row
+
+    return output_dataframe
+
+
--- a/napp_plotlib.py
+++ b/napp_plotlib.py
@ -0,0 +1,45 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+def plot_image(dataframe,filter):
+
+    for meas_idx in dataframe.loc[filter,:].index:
+        meas = dataframe.loc[meas_idx,:] # pandas Series    
+        fig = plt.figure()
+        ax = plt.gca()
+        rows, cols = meas['image'].shape
+        scientaEkin_eV = meas['scientaEkin_eV'].flatten()
+        x_min, x_max = np.min(scientaEkin_eV), np.max(scientaEkin_eV)
+        y_min, y_max = 0, rows
+        ax.imshow(meas['image'],extent = [x_min,x_max,y_min,y_max])
+        ax.set_xlabel('scientaEkin_eV')
+        ax.set_ylabel('Replicates')
+        ax.set_title(meas['name'][0]+ '\n'+meas['sample'][0]+ '\n' + meas['lastModifiedDatestr'][0])
+
+def plot_spectrum(dataframe,filter,):
+
+    fig = plt.figure()
+    ax = plt.gca()
+
+    for meas_idx in dataframe.loc[filter,:].index:
+        meas = dataframe.loc[meas_idx,:] # pandas Series   
+
+        rows, cols = meas['image'].shape
+        bindingEnergy_eV = meas['bindingEnergy_eV'].flatten()
+        spectrum_countsPerSecond = meas['spectrum_countsPerSecond'].flatten()
+        x_min, x_max = np.min(bindingEnergy_eV), np.max(bindingEnergy_eV)
+        y_min, y_max = 0, rows
+        #for i in range(cols):
+        ax.plot(bindingEnergy_eV, spectrum_countsPerSecond,label = meas['name'][0])
+        #ax.plot(bindingEnergy_eV, np.mean(meas['image'],axis=0))
+        ax.set_xlabel('bindingEnergy_eV')
+        ax.set_ylabel('counts Per Second')
+        if len(meas)>1:
+            ax.set_title('\n'+meas['sample'][0]+ '\n' + meas['lastModifiedDatestr'][0])
+        else:
+            ax.set_title(meas['name'][0] + '\n'+meas['sample'][0]+ '\n' + meas['lastModifiedDatestr'][0])
+    ax.legend()
+
+
+