diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index a4a5b48..6a27bce 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -22,6 +22,11 @@ from dask import delayed import time +import ipywidgets as widgets +from IPython.display import display, clear_output + + +from matplotlib.backends.backend_pdf import PdfPages # %% Functions for listing files with specific string in name @@ -292,7 +297,7 @@ def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory): # %% Functions to read sp2b files - +@delayed def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory): labview_epoch = datetime.datetime(1904, 1, 1) results = [] @@ -578,10 +583,14 @@ def read_and_process_sp2b(matches, target_directory, meta_file): # def name_function(part_idx): # return f"{first_elements[part_idx]}.parquet" + fn = file_path.split('\\')[-1].split('_')[-2] + '_' + file_path.split('\\')[-1].split('_')[-1].split('.')[-2] + def name(part_idx): + return f'{fn}.parquet' + combined_ddf.to_parquet(path=target_directory, engine='pyarrow', partition_on=['date', 'hour'], - name_function=name_function, + name_function=name, write_index=True, append=False, schema='infer') @@ -1671,7 +1680,7 @@ def process_hist_and_dist(df, col, flag_col, flag_value, bin_lims, bin_ctrs, dt_ # Resample and calculate histogram ddf_hist_compact = df_filtered[col].resample(dt_str).agg( - {'result': lambda x: calculate_histogram(x, bin_lims=bin_lims)}) + {'result': lambda x: calculate_histogram(x, bin_lims=bin_lims)}) # I might want to add a .fillna(0) here, this would solve the problem of fillna in the resampling function # Add and filter based on 'original_idx' ddf_hist_compact[['original_idx']] = df_filtered[['temporary_col']].resample(dt_str).count() @@ -1850,16 +1859,16 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_pbp['cnts_thin_low_inc_scatt_ratio'] = 0 ddf_pbp['cnts_particles_for_tl_dist'] = 0 # this flag is used for the calculation of time lag distributions below (it includes the particles classfified as "thin" or "thick") - ddf_pbp.loc[~flag_scatt & flag_inc_in_range_tl_analysis, 'thin_noScatt'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1 - ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick_sat'] = 1 - ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50, 'cnts_thin_sat'] = 1 - ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_negative_timelag, 'cnts_ntl_sat'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_negative_timelag, 'cnts_ntl'] = 1 - ddf_pbp.loc[flag_scatt & flag_inc_in_range_tl_analysis & flag_extreme_positive_timelag, 'cnts_extreme_positive_timelag'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1 + ddf_pbp.loc[~flag_scatt & flag_inc_in_range, 'cnts_thin_noScatt'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1 + ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick_sat'] = 1 + ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50, 'cnts_thin_sat'] = 1 + ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_negative_timelag, 'cnts_ntl_sat'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_negative_timelag, 'cnts_ntl'] = 1 + ddf_pbp.loc[flag_scatt & flag_inc_in_range & flag_extreme_positive_timelag, 'cnts_extreme_positive_timelag'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1 ddf_pbp['cnts_thin_total'] = ddf_pbp['cnts_thin'] @@ -1931,7 +1940,7 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_hk['temporary_col'] = 1 ddf_hk_1s = ddf_hk[['Sample Flow Controller Read (sccm)', - 'Sample Flow Controller Read (vccm)']].resample(dt_str).mean() + 'Sample Flow Controller Read (vccm)']].resample(dt_str).mean() # do i need a fill na here? ddf_hk_1s[['original_idx']] = ddf_hk[['temporary_col']].resample(dt_str).count() ddf_hk_1s = ddf_hk_1s[ddf_hk_1s['original_idx'] != 0] @@ -1963,10 +1972,11 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_pbp_hk['S_numConc_within_range_std'] = ddf_pbp_hk['Scatt numb within range'] / (ddf_pbp_hk['Sample Flow Controller Read (sccm)'] * (dt/60)) ddf_pbp_hk['S_numConc_within_range_vol'] = ddf_pbp_hk['Scatt numb within range'] / (ddf_pbp_hk['Sample Flow Controller Read (vccm)'] * (dt/60)) - ddf_pbp['temporary_col'] = 1 + # Calculate histograms of different classifications/flags: + ddf_pbp['temporary_col'] = 1 dNdlogDmev, dMdlogDmev = process_hist_and_dist(ddf_pbp, 'BC mass within range', None, None, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type) dNdlogDmev_thin, dMdlogDmev_thin = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type) @@ -2065,8 +2075,8 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False): cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols cols_for_count = ['temporary_col'] - data_resampled_mean = dd_data[cols_for_mean].resample(f'{dt}s').mean() - data_resampled_sum = dd_data[cols_for_sum].resample(f'{dt}s').sum() + data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f'{dt}s').mean() + data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f'{dt}s').sum() data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count() @@ -2088,3 +2098,77 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False): append=False) +#%% Widget to plot raw traces + + +def raw_traces_plot(ch0_plot=[], ch1_plot=[], + add_str_title='', + xmin=None, xmax=None, + ddf_processed=False, verbose=False, + ): + current_event_index = 1 + + def update_plot(event_index): + fig, ax1 = plt.subplots(figsize=(5, 3), layout='constrained') + + if verbose and ddf_processed.any().any(): + ax1.text(0.75, 0.4, 'ch0_flag_000: '+str(ddf_processed.iloc[event_index]['ch0_flag_000']), fontsize=8, transform=ax1.transAxes) + + + ax1.set_title(add_str_title+' , idx: '+str(event_index)+'\n'+str(ch0_plot.iloc[event_index].name)) + ax1t = ax1.twinx() + + if len(ch0_plot)!=0: + ax1.plot(ch0_plot.iloc[event_index], label='ch0', c='C0', ls='-') + if len(ch1_plot)!=0: + ax1t.plot(ch1_plot.iloc[event_index], label='ch1', c='C1', ls='-') + + ax1.set_ylabel('Scattering', color='tab:blue') + ax1.tick_params(axis='y', labelcolor='tab:blue') + ax1.legend(loc=1) + ax1t.legend(loc=4) + + if xmin and xmax is not None: + ax1.set_xlim(xmin, xmax) + + plt.show() + + def handle_forward_button_click(b): + nonlocal current_event_index + current_event_index = min(current_event_index + 1, len(ch0_plot) - 1) + event_slider.value = current_event_index + + def handle_backward_button_click(b): + nonlocal current_event_index + current_event_index = max(current_event_index - 1, 0) + event_slider.value = current_event_index + + event_slider = widgets.IntSlider(min=0, max=len(ch0_plot) - 1, step=1, value=current_event_index, description='Event:') + forward_button = widgets.Button(description='Forward') + backward_button = widgets.Button(description='Backward') + + forward_button.on_click(handle_forward_button_click) + backward_button.on_click(handle_backward_button_click) + + display(widgets.HBox([backward_button, forward_button])) + interactive_plot = widgets.interactive(update_plot, event_index=event_slider) + display(interactive_plot) + + +#%% Functions for plots + + +def save_image(filename): + + with PdfPages(filename) as p: + + # get_fignums Return list of existing + # figure numbers + fig_nums = plt.get_fignums() + figs = [plt.figure(n) for n in fig_nums] + + # iterating over the numbers in list + for fig in figs: + + # and saving the files + fig.savefig(p, format='pdf') \ No newline at end of file