Cherry-picked SP2XR_toolkit.py from commit 1703860 from branch sp2xr_nya_code

This commit is contained in:
2024-10-17 09:41:12 +02:00
parent f060cfbe8d
commit 454ef54ade

View File

@ -22,6 +22,11 @@ from dask import delayed
import time
import ipywidgets as widgets
from IPython.display import display, clear_output
from matplotlib.backends.backend_pdf import PdfPages
# %% Functions for listing files with specific string in name
@ -292,7 +297,7 @@ def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory):
# %% Functions to read sp2b files
@delayed
def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory):
labview_epoch = datetime.datetime(1904, 1, 1)
results = []
@ -578,10 +583,14 @@ def read_and_process_sp2b(matches, target_directory, meta_file):
# def name_function(part_idx):
# return f"{first_elements[part_idx]}.parquet"
fn = file_path.split('\\')[-1].split('_')[-2] + '_' + file_path.split('\\')[-1].split('_')[-1].split('.')[-2]
def name(part_idx):
return f'{fn}.parquet'
combined_ddf.to_parquet(path=target_directory,
engine='pyarrow',
partition_on=['date', 'hour'],
name_function=name_function,
name_function=name,
write_index=True,
append=False, schema='infer')
@ -1671,7 +1680,7 @@ def process_hist_and_dist(df, col, flag_col, flag_value, bin_lims, bin_ctrs, dt_
# Resample and calculate histogram
ddf_hist_compact = df_filtered[col].resample(dt_str).agg(
{'result': lambda x: calculate_histogram(x, bin_lims=bin_lims)})
{'result': lambda x: calculate_histogram(x, bin_lims=bin_lims)}) # I might want to add a .fillna(0) here, this would solve the problem of fillna in the resampling function
# Add and filter based on 'original_idx'
ddf_hist_compact[['original_idx']] = df_filtered[['temporary_col']].resample(dt_str).count()
@ -1850,16 +1859,16 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
ddf_pbp['cnts_thin_low_inc_scatt_ratio'] = 0
ddf_pbp['cnts_particles_for_tl_dist'] = 0 # this flag is used for the calculation of time lag distributions below (it includes the particles classfified as "thin" or "thick")
ddf_pbp.loc[~flag_scatt & flag_inc_in_range_tl_analysis, 'thin_noScatt'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1
ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick_sat'] = 1
ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50, 'cnts_thin_sat'] = 1
ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_negative_timelag, 'cnts_ntl_sat'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_negative_timelag, 'cnts_ntl'] = 1
ddf_pbp.loc[flag_scatt & flag_inc_in_range_tl_analysis & flag_extreme_positive_timelag, 'cnts_extreme_positive_timelag'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1
ddf_pbp.loc[~flag_scatt & flag_inc_in_range, 'cnts_thin_noScatt'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1
ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick_sat'] = 1
ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50, 'cnts_thin_sat'] = 1
ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_negative_timelag, 'cnts_ntl_sat'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_negative_timelag, 'cnts_ntl'] = 1
ddf_pbp.loc[flag_scatt & flag_inc_in_range & flag_extreme_positive_timelag, 'cnts_extreme_positive_timelag'] = 1
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1
ddf_pbp['cnts_thin_total'] = ddf_pbp['cnts_thin']
@ -1931,7 +1940,7 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
ddf_hk['temporary_col'] = 1
ddf_hk_1s = ddf_hk[['Sample Flow Controller Read (sccm)',
'Sample Flow Controller Read (vccm)']].resample(dt_str).mean()
'Sample Flow Controller Read (vccm)']].resample(dt_str).mean() # do i need a fill na here?
ddf_hk_1s[['original_idx']] = ddf_hk[['temporary_col']].resample(dt_str).count()
ddf_hk_1s = ddf_hk_1s[ddf_hk_1s['original_idx'] != 0]
@ -1963,10 +1972,11 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
ddf_pbp_hk['S_numConc_within_range_std'] = ddf_pbp_hk['Scatt numb within range'] / (ddf_pbp_hk['Sample Flow Controller Read (sccm)'] * (dt/60))
ddf_pbp_hk['S_numConc_within_range_vol'] = ddf_pbp_hk['Scatt numb within range'] / (ddf_pbp_hk['Sample Flow Controller Read (vccm)'] * (dt/60))
ddf_pbp['temporary_col'] = 1
# Calculate histograms of different classifications/flags:
ddf_pbp['temporary_col'] = 1
dNdlogDmev, dMdlogDmev = process_hist_and_dist(ddf_pbp, 'BC mass within range', None, None, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
dNdlogDmev_thin, dMdlogDmev_thin = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
@ -2065,8 +2075,8 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False):
cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols
cols_for_count = ['temporary_col']
data_resampled_mean = dd_data[cols_for_mean].resample(f'{dt}s').mean()
data_resampled_sum = dd_data[cols_for_sum].resample(f'{dt}s').sum()
data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f'{dt}s').mean()
data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f'{dt}s').sum()
data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count()
@ -2088,3 +2098,77 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False):
append=False)
#%% Widget to plot raw traces
def raw_traces_plot(ch0_plot=[], ch1_plot=[],
add_str_title='',
xmin=None, xmax=None,
ddf_processed=False, verbose=False,
):
current_event_index = 1
def update_plot(event_index):
fig, ax1 = plt.subplots(figsize=(5, 3), layout='constrained')
if verbose and ddf_processed.any().any():
ax1.text(0.75, 0.4, 'ch0_flag_000: '+str(ddf_processed.iloc[event_index]['ch0_flag_000']), fontsize=8, transform=ax1.transAxes)
ax1.set_title(add_str_title+' , idx: '+str(event_index)+'\n'+str(ch0_plot.iloc[event_index].name))
ax1t = ax1.twinx()
if len(ch0_plot)!=0:
ax1.plot(ch0_plot.iloc[event_index], label='ch0', c='C0', ls='-')
if len(ch1_plot)!=0:
ax1t.plot(ch1_plot.iloc[event_index], label='ch1', c='C1', ls='-')
ax1.set_ylabel('Scattering', color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax1.legend(loc=1)
ax1t.legend(loc=4)
if xmin and xmax is not None:
ax1.set_xlim(xmin, xmax)
plt.show()
def handle_forward_button_click(b):
nonlocal current_event_index
current_event_index = min(current_event_index + 1, len(ch0_plot) - 1)
event_slider.value = current_event_index
def handle_backward_button_click(b):
nonlocal current_event_index
current_event_index = max(current_event_index - 1, 0)
event_slider.value = current_event_index
event_slider = widgets.IntSlider(min=0, max=len(ch0_plot) - 1, step=1, value=current_event_index, description='Event:')
forward_button = widgets.Button(description='Forward')
backward_button = widgets.Button(description='Backward')
forward_button.on_click(handle_forward_button_click)
backward_button.on_click(handle_backward_button_click)
display(widgets.HBox([backward_button, forward_button]))
interactive_plot = widgets.interactive(update_plot, event_index=event_slider)
display(interactive_plot)
#%% Functions for plots
def save_image(filename):
with PdfPages(filename) as p:
# get_fignums Return list of existing
# figure numbers
fig_nums = plt.get_fignums()
figs = [plt.figure(n) for n in fig_nums]
# iterating over the numbers in list
for fig in figs:
# and saving the files
fig.savefig(p, format='pdf')