From 51a005f2da1c9aa3f70ec357d280e1d20de4fe63 Mon Sep 17 00:00:00 2001 From: Bertozzi Barbara Date: Thu, 24 Jul 2025 20:33:12 +0200 Subject: [PATCH] =?UTF-8?q?style:=20apply=20Black=20auto=E2=80=91formattin?= =?UTF-8?q?g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SP2XR_toolkit.py | 36 +++++++++++++++++----------------- example_calibration_code.ipynb | 7 ++----- example_processing_code.py | 22 +++++++++++---------- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index a495ef7..a9c4fb3 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -24,7 +24,7 @@ from dask import delayed import time import ipywidgets as widgets -from IPython.display import display, clear_output +from IPython.display import display from matplotlib.backends.backend_pdf import PdfPages @@ -381,7 +381,7 @@ def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory): def process_block(f, file_size): while f.tell() < file_size: - initial_pos = f.tell() + # initial_pos = f.tell() size_2d_array = struct.unpack("> 2i", f.read(8)) if size_2d_array[0] == 0: @@ -575,7 +575,7 @@ def read_sp2b_from_sp2xr_zipped(file_path, meta): def process_block(f, file_size): while f.tell() < file_size: - initial_pos = f.tell() + # initial_pos = f.tell() size_2d_array = struct.unpack("> 2i", f.read(8)) if size_2d_array[0] == 0: @@ -687,7 +687,7 @@ def read_sp2b_from_sp2xr_zipped(file_path, meta): return meta -def read_and_process_sp2b(matches, target_directory, meta_file): +def read_and_process_sp2b(matches, target_directory, meta_file, file_path): if len(matches) > 0: delayed_results = [ @@ -2116,19 +2116,19 @@ def process_pbp_parquet( ddf_pbp["BC mass"] = ddf_pbp["Incand relPeak"].apply( lambda x: polynomial(x, *inc_calib_params) ) # , meta=('BC mass', 'float64')) - minM_timelag = polynomial(5e6, *inc_calib_params) + # minM_timelag = polynomial(5e6, *inc_calib_params) elif inc_calib_curve == "powerlaw": ddf_pbp["BC mass"] = ddf_pbp["Incand relPeak"].apply( lambda x: powerlaw(x, inc_calib_params) ) # , meta=('BC mass', 'float64')) - minM_timelag = powerlaw(5e6, inc_calib_params) + # minM_timelag = powerlaw(5e6, inc_calib_params) else: ddf_pbp["BC mass"] = ddf_pbp["Incand Mass (fg)"] - minM_timelag = ddf_pbp.loc[ - (ddf_pbp["Incand relPeak"] <= 5.05e6) - & (ddf_pbp["Incand relPeak"] >= 4.95e6), - "Incand Mass (fg)", - ].mean() # this could create problems if there are no incandescence signals in the selected range + # minM_timelag = ddf_pbp.loc[ + # (ddf_pbp["Incand relPeak"] <= 5.05e6) + # & (ddf_pbp["Incand relPeak"] >= 4.95e6), + # "Incand Mass (fg)", + # ].mean() # this could create problems if there are no incandescence signals in the selected range ddf_pbp.loc[ddf_pbp["Incand relPeak"] == 0, "BC mass"] = np.nan @@ -2165,7 +2165,7 @@ def process_pbp_parquet( flag_inc_fwhm = (ddf_pbp["Incand FWHM"] >= ini_params["IncFWHMMin"]) & ( ddf_pbp["Incand FWHM"] <= ini_params["IncFWHMMax"] ) - flag_inc_not_sat = ddf_pbp["Incand relPeak"] < ini_params["IncSatPoint"] + # flag_inc_not_sat = ddf_pbp["Incand relPeak"] < ini_params["IncSatPoint"] flag_scatt_transit_time = ( ddf_pbp["Scatter Transit Time"] >= ini_params["ScattTransitMin"] @@ -2179,9 +2179,9 @@ def process_pbp_parquet( flag_inc_in_range = ( flag_inc & (ddf_pbp["BC mass"] >= minM) & (ddf_pbp["BC mass"] <= maxM) ) - flag_inc_in_range_tl_analysis = ( - flag_inc & (ddf_pbp["BC mass"] >= minM_timelag) & (ddf_pbp["BC mass"] <= maxM) - ) + # flag_inc_in_range_tl_analysis = ( + # flag_inc & (ddf_pbp["BC mass"] >= minM_timelag) & (ddf_pbp["BC mass"] <= maxM) + # ) flag_scatt = flag_scatt_transit_time & flag_scatt_fwhm flag_scatt_in_range = ( @@ -2711,7 +2711,7 @@ def process_pbp_parquet( final_df["hour"] = final_df.index.hour final_df["date"] = final_df["date"].astype("date64[pyarrow]") - if save_final_data == True: + if save_final_data: dd.from_pandas(final_df.sort_index(), npartitions=1).to_parquet( path=path_parquet, engine="pyarrow", @@ -2777,11 +2777,11 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet="", save_final_data=False): ] ] cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols - cols_for_count = ["temporary_col"] + # cols_for_count = ["temporary_col"] data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f"{dt}s").mean() data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f"{dt}s").sum() - data_resampled_count = dd_data[cols_for_count].resample(f"{dt}s").count() + # data_resampled_count = dd_data[cols_for_count].resample(f"{dt}s").count() # merged = dd.merge(data_resampled_mean, data_resampled_sum, left_index=True, right_index=True, how='outer') merged = pd.concat([data_resampled_mean, data_resampled_sum], axis=1) diff --git a/example_calibration_code.ipynb b/example_calibration_code.ipynb index b281f58..cbff607 100644 --- a/example_calibration_code.ipynb +++ b/example_calibration_code.ipynb @@ -2,18 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "4e2fec67-a2b4-4ca5-80e0-71ed4f54f58f", "metadata": {}, "outputs": [], "source": [ - "import time\n", "import pandas as pd\n", "import numpy as np\n", - "import sys\n", - "import pickle\n", "\n", - "from SP2XR_toolkit import *" + "from SP2XR_toolkit import calculate_calib_coeff" ] }, { diff --git a/example_processing_code.py b/example_processing_code.py index 687c458..c01193b 100644 --- a/example_processing_code.py +++ b/example_processing_code.py @@ -7,21 +7,23 @@ Created on Mon May 27 11:45:19 2024 import time import pandas as pd -import numpy as np -import sys -import pickle import dask.dataframe as dd import dask from dask.distributed import Client from dask_jobqueue import SLURMCluster -import datetime -import struct -import zipfile -from dask import delayed -import itertools import gc -from SP2XR_toolkit import * +from SP2XR_toolkit import ( + find_files, + read_and_process_sp2b, + read_csv_files_with_dask_2, + process_sp2b_parquet, + resample_to_dt, + chunks, + list_first_level_subdirs, + process_pbp_parquet, + get_file_dict, +) # %% Define directories and folders @@ -53,7 +55,7 @@ meta_file_sp2b = pd.read_parquet( matching_files_pbp = find_files(source_directory, filter_string_pbp) matching_files_hk = find_files(source_directory, filter_string_hk) -# matching_files_sp2b = find_files(source_directory, filter_string_sp2b)[10000:50000] +matching_files_sp2b = find_files(source_directory, filter_string_sp2b)[10000:50000] # %% PBP: From csv/zip to parquet