From 976e8c65743acd2d05b08266df35b2bc363149d3 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Wed, 18 Sep 2024 16:34:18 +0200 Subject: [PATCH 1/6] gitignore added --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ From a42148cc132d45d2b62c59eb221abea43a7df30e Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Thu, 19 Sep 2024 15:22:41 +0200 Subject: [PATCH 2/6] Added function raw_traces_plot --- SP2XR_toolkit.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index a4a5b48..d963b28 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -22,6 +22,10 @@ from dask import delayed import time +import ipywidgets as widgets +from IPython.display import display, clear_output + + # %% Functions for listing files with specific string in name @@ -2088,3 +2092,58 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False): append=False) +#%% Widget to plot raw traces + + +def raw_traces_plot(ch0_plot=[], ch1_plot=[], + add_str_title='', + xmin=None, xmax=None, + ddf_processed=False, verbose=False, + ): + current_event_index = 1 + + def update_plot(event_index): + fig, ax1 = plt.subplots(figsize=(5, 3), layout='constrained') + + if verbose and ddf_processed.any().any(): + ax1.text(0.75, 0.4, 'ch0_flag_000: '+str(ddf_processed.iloc[event_index]['ch0_flag_000']), fontsize=8, transform=ax1.transAxes) + + + ax1.set_title(add_str_title+' , idx: '+str(event_index)+'\n'+str(ch0_plot.iloc[event_index].name)) + ax1t = ax1.twinx() + + if len(ch0_plot)!=0: + ax1.plot(ch0_plot.iloc[event_index], label='ch0', c='C0', ls='-') + if len(ch1_plot)!=0: + ax1t.plot(ch1_plot.iloc[event_index], label='ch1', c='C1', ls='-') + + ax1.set_ylabel('Scattering', color='tab:blue') + ax1.tick_params(axis='y', labelcolor='tab:blue') + ax1.legend(loc=1) + ax1t.legend(loc=4) + + if xmin and xmax is not None: + ax1.set_xlim(xmin, xmax) + + plt.show() + + def handle_forward_button_click(b): + nonlocal current_event_index + current_event_index = min(current_event_index + 1, len(ch0_plot) - 1) + event_slider.value = current_event_index + + def handle_backward_button_click(b): + nonlocal current_event_index + current_event_index = max(current_event_index - 1, 0) + event_slider.value = current_event_index + + event_slider = widgets.IntSlider(min=0, max=len(ch0_plot) - 1, step=1, value=current_event_index, description='Event:') + forward_button = widgets.Button(description='Forward') + backward_button = widgets.Button(description='Backward') + + forward_button.on_click(handle_forward_button_click) + backward_button.on_click(handle_backward_button_click) + + display(widgets.HBox([backward_button, forward_button])) + interactive_plot = widgets.interactive(update_plot, event_index=event_slider) + display(interactive_plot) \ No newline at end of file From a4bbf356afb929ed1097b94582e28d7ae4cd0058 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Mon, 23 Sep 2024 12:45:53 +0200 Subject: [PATCH 3/6] Function to read sp2b updated, added fillna(0) before resampling in the function resample_to_dt --- SP2XR_toolkit.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index d963b28..3e19ec8 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -296,7 +296,7 @@ def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory): # %% Functions to read sp2b files - +@delayed def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory): labview_epoch = datetime.datetime(1904, 1, 1) results = [] @@ -582,10 +582,14 @@ def read_and_process_sp2b(matches, target_directory, meta_file): # def name_function(part_idx): # return f"{first_elements[part_idx]}.parquet" + fn = file_path.split('\\')[-1].split('_')[-2] + '_' + file_path.split('\\')[-1].split('_')[-1].split('.')[-2] + def name(part_idx): + return f'{fn}.parquet' + combined_ddf.to_parquet(path=target_directory, engine='pyarrow', partition_on=['date', 'hour'], - name_function=name_function, + name_function=name, write_index=True, append=False, schema='infer') @@ -2069,8 +2073,8 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False): cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols cols_for_count = ['temporary_col'] - data_resampled_mean = dd_data[cols_for_mean].resample(f'{dt}s').mean() - data_resampled_sum = dd_data[cols_for_sum].resample(f'{dt}s').sum() + data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f'{dt}s').mean() + data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f'{dt}s').sum() data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count() From 144fc61978729ad4220694e8554046f9f2104531 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Mon, 23 Sep 2024 12:54:29 +0200 Subject: [PATCH 4/6] Added function save_image to save multiple figures in a single pdf --- SP2XR_toolkit.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index 3e19ec8..c4bb5a6 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -26,6 +26,7 @@ import ipywidgets as widgets from IPython.display import display, clear_output +from matplotlib.backends.backend_pdf import PdfPages # %% Functions for listing files with specific string in name @@ -1939,7 +1940,7 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_hk['temporary_col'] = 1 ddf_hk_1s = ddf_hk[['Sample Flow Controller Read (sccm)', - 'Sample Flow Controller Read (vccm)']].resample(dt_str).mean() + 'Sample Flow Controller Read (vccm)']].resample(dt_str).mean() # do i need a fill na here? ddf_hk_1s[['original_idx']] = ddf_hk[['temporary_col']].resample(dt_str).count() ddf_hk_1s = ddf_hk_1s[ddf_hk_1s['original_idx'] != 0] @@ -1971,10 +1972,11 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_pbp_hk['S_numConc_within_range_std'] = ddf_pbp_hk['Scatt numb within range'] / (ddf_pbp_hk['Sample Flow Controller Read (sccm)'] * (dt/60)) ddf_pbp_hk['S_numConc_within_range_vol'] = ddf_pbp_hk['Scatt numb within range'] / (ddf_pbp_hk['Sample Flow Controller Read (vccm)'] * (dt/60)) - ddf_pbp['temporary_col'] = 1 + # Calculate histograms of different classifications/flags: + ddf_pbp['temporary_col'] = 1 dNdlogDmev, dMdlogDmev = process_hist_and_dist(ddf_pbp, 'BC mass within range', None, None, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type) dNdlogDmev_thin, dMdlogDmev_thin = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type) @@ -2150,4 +2152,23 @@ def raw_traces_plot(ch0_plot=[], ch1_plot=[], display(widgets.HBox([backward_button, forward_button])) interactive_plot = widgets.interactive(update_plot, event_index=event_slider) - display(interactive_plot) \ No newline at end of file + display(interactive_plot) + + +#%% Functions for plots + + +def save_image(filename): + + with PdfPages(filename) as p: + + # get_fignums Return list of existing + # figure numbers + fig_nums = plt.get_fignums() + figs = [plt.figure(n) for n in fig_nums] + + # iterating over the numbers in list + for fig in figs: + + # and saving the files + fig.savefig(p, format='pdf') \ No newline at end of file From 740a8a1d738c4c7f91367a7f42161ab34e99c5e8 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Wed, 25 Sep 2024 10:16:18 +0200 Subject: [PATCH 5/6] fixed small bug for particle classified as thin_noScatt --- SP2XR_toolkit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index c4bb5a6..9682b15 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -1680,7 +1680,7 @@ def process_hist_and_dist(df, col, flag_col, flag_value, bin_lims, bin_ctrs, dt_ # Resample and calculate histogram ddf_hist_compact = df_filtered[col].resample(dt_str).agg( - {'result': lambda x: calculate_histogram(x, bin_lims=bin_lims)}) + {'result': lambda x: calculate_histogram(x, bin_lims=bin_lims)}) # I might want to add a .fillna(0) here, this would solve the problem of fillna in the resampling function # Add and filter based on 'original_idx' ddf_hist_compact[['original_idx']] = df_filtered[['temporary_col']].resample(dt_str).count() @@ -1859,7 +1859,7 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_pbp['cnts_thin_low_inc_scatt_ratio'] = 0 ddf_pbp['cnts_particles_for_tl_dist'] = 0 # this flag is used for the calculation of time lag distributions below (it includes the particles classfified as "thin" or "thick") - ddf_pbp.loc[~flag_scatt & flag_inc_in_range_tl_analysis, 'thin_noScatt'] = 1 + ddf_pbp.loc[~flag_scatt & flag_inc_in_range_tl_analysis, 'cnts_thin_noScatt'] = 1 ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1 ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1 ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1 From 17038600e42546b0f5dc831a865b6c68a7c5ca14 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Wed, 25 Sep 2024 10:28:56 +0200 Subject: [PATCH 6/6] Now also for coating-related particle classifications, we consider particles as small as 0.3fg(~70nm) (before I was considering only particles larger than ~100nm) --- SP2XR_toolkit.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index 9682b15..6a27bce 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -1859,16 +1859,16 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk, ddf_pbp['cnts_thin_low_inc_scatt_ratio'] = 0 ddf_pbp['cnts_particles_for_tl_dist'] = 0 # this flag is used for the calculation of time lag distributions below (it includes the particles classfified as "thin" or "thick") - ddf_pbp.loc[~flag_scatt & flag_inc_in_range_tl_analysis, 'cnts_thin_noScatt'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1 - ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick_sat'] = 1 - ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_timelag_0_50, 'cnts_thin_sat'] = 1 - ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_negative_timelag, 'cnts_ntl_sat'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & flag_negative_timelag, 'cnts_ntl'] = 1 - ddf_pbp.loc[flag_scatt & flag_inc_in_range_tl_analysis & flag_extreme_positive_timelag, 'cnts_extreme_positive_timelag'] = 1 - ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range_tl_analysis & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1 + ddf_pbp.loc[~flag_scatt & flag_inc_in_range, 'cnts_thin_noScatt'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50 & ~flag_low_ratio_inc_scatt, 'cnts_thin'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50 & flag_low_ratio_inc_scatt, 'cnts_thin_low_inc_scatt_ratio'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick'] = 1 + ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_timelag_greater_50 & ~flag_extreme_positive_timelag, 'cnts_thick_sat'] = 1 + ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_timelag_0_50, 'cnts_thin_sat'] = 1 + ddf_pbp.loc[flag_scatt & ~flag_scatt_not_sat & flag_inc_in_range & flag_negative_timelag, 'cnts_ntl_sat'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & flag_negative_timelag, 'cnts_ntl'] = 1 + ddf_pbp.loc[flag_scatt & flag_inc_in_range & flag_extreme_positive_timelag, 'cnts_extreme_positive_timelag'] = 1 + ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1 ddf_pbp['cnts_thin_total'] = ddf_pbp['cnts_thin']