Functions updated
This commit is contained in:
@ -75,7 +75,7 @@ def find_files(directory, string, avoid='xxxxxxxxxx'):
|
||||
|
||||
return filtered_paths
|
||||
|
||||
def get_file_dict(directory, file_type):
|
||||
def get_file_dict(directory, file_type, level='hour'):
|
||||
"""
|
||||
Creates a dictionary with date and hour as keys and file paths as values
|
||||
for the given directory.
|
||||
@ -87,7 +87,10 @@ def get_file_dict(directory, file_type):
|
||||
parts = file_path.split(os.sep)
|
||||
date = parts[-3]
|
||||
hour = parts[-2]
|
||||
file_dict[(date, hour)] = os.path.join('/', *parts[:-1])
|
||||
if level == 'hour':
|
||||
file_dict[(date, hour)] = os.path.join('/', *parts[:-1])
|
||||
elif level == 'date':
|
||||
file_dict[(date)] = os.path.join('/', *parts[:-2])
|
||||
return file_dict
|
||||
|
||||
|
||||
@ -1508,7 +1511,10 @@ def calculate_calib_coeff(pbp_data, calib_dict,
|
||||
|
||||
if do_peak_histogram_plots:
|
||||
fig, axs = plt.subplots()
|
||||
axs.set_title(f'file:{folder_name} - mass={mass} fg - aerosol:{aerosol_type}')
|
||||
if size_selection_method == 'APM':
|
||||
axs.set_title(f'file:{folder_name} - mass={mass} fg - aerosol:{aerosol_type}')
|
||||
elif size_selection_method == 'DMA':
|
||||
axs.set_title(f'file:{folder_name} - diam={diam} nm - aerosol:{aerosol_type}')
|
||||
sns.histplot(tmp_pbp, stat='frequency', log_scale=(True, False), bins=nbins, color='grey', alpha=0.5)
|
||||
|
||||
axst = axs.twinx()
|
||||
@ -1520,7 +1526,10 @@ def calculate_calib_coeff(pbp_data, calib_dict,
|
||||
axs.axvline(pu[i], c='g', lw=2)
|
||||
axst.set_ylim(0, )
|
||||
if save_peak_histogram_plots:
|
||||
plt.savefig(plot_dir+f'\\hist_plot_mass_{mass}.png', dpi=600)
|
||||
if size_selection_method == 'APM':
|
||||
plt.savefig(plot_dir+f'/hist_plot_mass_{mass}.png', dpi=600)
|
||||
elif size_selection_method == 'DMA':
|
||||
plt.savefig(plot_dir+f'/hist_plot_diam_{diam}.png', dpi=600)
|
||||
tmp_peak_height_fit = np.concatenate( tmp_peak_height_fit, axis=0 )
|
||||
if len(tmp_mass_fit)>0:
|
||||
tmp_mass_or_diam_fit = np.concatenate( tmp_mass_fit, axis=0 )
|
||||
@ -1560,7 +1569,7 @@ def calculate_calib_coeff(pbp_data, calib_dict,
|
||||
axs.plot(bin_centers, calib_curve_fit, lw=2, c='C0')
|
||||
|
||||
if save_calib_curve_plot:
|
||||
plt.savefig(plot_dir+'\\mass_peakH.png', dpi=600)
|
||||
plt.savefig(plot_dir+'mass_peakH.png', dpi=600)
|
||||
|
||||
return calib_dict, popt
|
||||
|
||||
@ -1753,7 +1762,7 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
|
||||
minM=None, maxM=None, n_incbins=None,
|
||||
minOptD=None, maxOptD=None, n_scattbins=None,
|
||||
minTL=None, maxTL=None, n_timelag=None,
|
||||
save_final_data=True, path_parquet=''
|
||||
save_final_data=True, path_parquet='', partition_on=['date', 'hour']
|
||||
):
|
||||
|
||||
read_dir_pbp = dir_path_pbp
|
||||
@ -1828,9 +1837,9 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
|
||||
flag_scatt_in_range = flag_scatt & (ddf_pbp['Opt diam'] >= minOptD) & (ddf_pbp['Opt diam'] <= maxOptD)
|
||||
|
||||
|
||||
flag_negative_timelag = ddf_pbp['time_lag_new']<0
|
||||
flag_negative_timelag = ddf_pbp['time_lag_new']<-10
|
||||
flag_extreme_positive_timelag = ddf_pbp['time_lag_new']>=400
|
||||
flag_timelag_0_50 = (ddf_pbp['time_lag_new']<50) & (ddf_pbp['time_lag_new']>=0)
|
||||
flag_timelag_0_50 = (ddf_pbp['time_lag_new']<50) & (ddf_pbp['time_lag_new']>=-10)
|
||||
flag_timelag_greater_50 = (ddf_pbp['time_lag_new']>=50) & (ddf_pbp['time_lag_new']<400)
|
||||
|
||||
ddf_pbp['ratio_inc_scatt'] = np.log10(ddf_pbp['Incand relPeak']) / np.log10(ddf_pbp['Scatter relPeak'])
|
||||
@ -1871,9 +1880,9 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
|
||||
ddf_pbp.loc[flag_scatt & flag_scatt_not_sat & flag_inc_in_range & ((flag_timelag_0_50 & ~flag_low_ratio_inc_scatt) | (flag_timelag_greater_50 & ~flag_extreme_positive_timelag)), 'cnts_particles_for_tl_dist'] = 1
|
||||
|
||||
|
||||
ddf_pbp['cnts_thin_total'] = ddf_pbp['cnts_thin']
|
||||
ddf_pbp['cnts_thick_total'] = ddf_pbp['cnts_thick'] + ddf_pbp['cnts_thick_sat'] + ddf_pbp['cnts_ntl_sat'] + ddf_pbp['cnts_ntl']
|
||||
ddf_pbp['cnts_unclassified'] = ddf_pbp['cnts_thin_noScatt'] + ddf_pbp['cnts_extreme_positive_timelag'] + ddf_pbp['cnts_thin_low_inc_scatt_ratio'] + ddf_pbp['cnts_thin_sat']
|
||||
ddf_pbp['cnts_thin_total'] = ddf_pbp['cnts_thin'] + ddf_pbp['cnts_thin_noScatt']
|
||||
ddf_pbp['cnts_thick_total'] = ddf_pbp['cnts_thick'] + ddf_pbp['cnts_thick_sat'] + ddf_pbp['cnts_ntl_sat'] + ddf_pbp['cnts_ntl'] + ddf_pbp['cnts_thin_sat']
|
||||
ddf_pbp['cnts_unclassified'] = ddf_pbp['cnts_extreme_positive_timelag'] + ddf_pbp['cnts_thin_low_inc_scatt_ratio']
|
||||
|
||||
|
||||
|
||||
@ -1978,25 +1987,25 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
|
||||
# Calculate histograms of different classifications/flags:
|
||||
ddf_pbp['temporary_col'] = 1
|
||||
|
||||
dNdlogDmev, dMdlogDmev = process_hist_and_dist(ddf_pbp, 'BC mass within range', None, None, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thin, dMdlogDmev_thin = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thin_noScatt, dMdlogDmev_thin_noScatt = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_noScatt', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thick, dMdlogDmev_thick = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thick', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thick_sat, dMdlogDmev_thick_sat = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thick_sat', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thin_sat, dMdlogDmev_thin_sat = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_sat', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_ntl_sat, dMdlogDmev_ntl_sat = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_ntl_sat', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_ntl, dMdlogDmev_ntl = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_ntl', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_extreme_positive_timelag, dMdlogDmev_extreme_positive_timelag = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_extreme_positive_timelag', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thin_low_inc_scatt_ratio, dMdlogDmev_thin_low_inc_scatt_ratio = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_low_inc_scatt_ratio', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thin_total, dMdlogDmev_thin_total = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_total', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_thick_total, dMdlogDmev_thick_total = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thick_total', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev_unclassified, dMdlogDmev_unclassified = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_unclassified', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type)
|
||||
dNdlogDmev, dMdlogDmev = process_hist_and_dist(ddf_pbp, 'BC mass within range', None, None, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thin, dMdlogDmev_thin = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thin_noScatt, dMdlogDmev_thin_noScatt = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_noScatt', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thick, dMdlogDmev_thick = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thick', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thick_sat, dMdlogDmev_thick_sat = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thick_sat', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thin_sat, dMdlogDmev_thin_sat = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_sat', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_ntl_sat, dMdlogDmev_ntl_sat = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_ntl_sat', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_ntl, dMdlogDmev_ntl = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_ntl', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_extreme_positive_timelag, dMdlogDmev_extreme_positive_timelag = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_extreme_positive_timelag', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thin_low_inc_scatt_ratio, dMdlogDmev_thin_low_inc_scatt_ratio = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_low_inc_scatt_ratio', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thin_total, dMdlogDmev_thin_total = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thin_total', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_thick_total, dMdlogDmev_thick_total = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_thick_total', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
dNdlogDmev_unclassified, dMdlogDmev_unclassified = process_hist_and_dist(ddf_pbp, 'BC mass within range', 'cnts_unclassified', 1, inc_mass_bin_lims, inc_mass_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=rho_eff, BC_type=BC_type, t=dt)
|
||||
|
||||
|
||||
scatt_bin_lims = np.logspace(np.log10(minOptD), np.log10(maxOptD), n_scattbins)
|
||||
scatt_bin_ctrs = bin_lims_to_ctrs(scatt_bin_lims)
|
||||
|
||||
dNdlogDsc, _ = process_hist_and_dist(ddf_pbp, 'Opt diam scatt only', None, None, scatt_bin_lims, scatt_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=None, BC_type=None)
|
||||
dNdlogDsc, _ = process_hist_and_dist(ddf_pbp, 'Opt diam scatt only', None, None, scatt_bin_lims, scatt_bin_ctrs, dt_str, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=None, BC_type=None, t=dt)
|
||||
|
||||
|
||||
|
||||
@ -2005,8 +2014,8 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
|
||||
|
||||
list_hist = []
|
||||
for idx, (name, group) in enumerate(ddf_pbp.groupby('BC mass bin')):
|
||||
a, _ = process_hist_and_dist(group, 'time_lag_new', 'cnts_particles_for_tl_dist', 1, timelag_bins_lims, timelag_bin_ctrs, dt_str, calculate_conc=False, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=None, BC_type=None, t=1)
|
||||
a.columns = [f'BC_mass_{inc_mass_bin_ctrs[idx]:.2f}_timelag_{i:.1f}' for i in timelag_bin_ctrs]
|
||||
a, _ = process_hist_and_dist(group, 'time_lag_new', 'cnts_particles_for_tl_dist', 1, timelag_bins_lims, timelag_bin_ctrs, dt_str, calculate_conc=True, flow=ddf_pbp_hk['Sample Flow Controller Read (vccm)'], rho_eff=None, BC_type=None, t=dt)
|
||||
a.columns = [f'dNdlogDmev_{inc_mass_bin_ctrs[idx]:.2f}_timelag_{i:.1f}' for i in timelag_bin_ctrs]
|
||||
|
||||
list_hist.append(a)
|
||||
time_lag_hists = pd.concat(list_hist, axis=1)
|
||||
@ -2045,7 +2054,7 @@ def process_pbp_parquet(dir_path_pbp, dir_path_hk,
|
||||
if save_final_data == True:
|
||||
dd.from_pandas(final_df.sort_index(), npartitions=1).to_parquet(path = path_parquet,
|
||||
engine='pyarrow',
|
||||
partition_on=['date', 'hour'],
|
||||
partition_on=partition_on,
|
||||
write_index=True
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user