style: apply Black auto‑formatting

This commit is contained in:
Bertozzi Barbara
2025-07-24 20:33:12 +02:00
parent 68f466c139
commit 51a005f2da
3 changed files with 32 additions and 33 deletions

View File

@@ -24,7 +24,7 @@ from dask import delayed
import time
import ipywidgets as widgets
from IPython.display import display, clear_output
from IPython.display import display
from matplotlib.backends.backend_pdf import PdfPages
@@ -381,7 +381,7 @@ def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory):
def process_block(f, file_size):
while f.tell() < file_size:
initial_pos = f.tell()
# initial_pos = f.tell()
size_2d_array = struct.unpack("> 2i", f.read(8))
if size_2d_array[0] == 0:
@@ -575,7 +575,7 @@ def read_sp2b_from_sp2xr_zipped(file_path, meta):
def process_block(f, file_size):
while f.tell() < file_size:
initial_pos = f.tell()
# initial_pos = f.tell()
size_2d_array = struct.unpack("> 2i", f.read(8))
if size_2d_array[0] == 0:
@@ -687,7 +687,7 @@ def read_sp2b_from_sp2xr_zipped(file_path, meta):
return meta
def read_and_process_sp2b(matches, target_directory, meta_file):
def read_and_process_sp2b(matches, target_directory, meta_file, file_path):
if len(matches) > 0:
delayed_results = [
@@ -2116,19 +2116,19 @@ def process_pbp_parquet(
ddf_pbp["BC mass"] = ddf_pbp["Incand relPeak"].apply(
lambda x: polynomial(x, *inc_calib_params)
) # , meta=('BC mass', 'float64'))
minM_timelag = polynomial(5e6, *inc_calib_params)
# minM_timelag = polynomial(5e6, *inc_calib_params)
elif inc_calib_curve == "powerlaw":
ddf_pbp["BC mass"] = ddf_pbp["Incand relPeak"].apply(
lambda x: powerlaw(x, inc_calib_params)
) # , meta=('BC mass', 'float64'))
minM_timelag = powerlaw(5e6, inc_calib_params)
# minM_timelag = powerlaw(5e6, inc_calib_params)
else:
ddf_pbp["BC mass"] = ddf_pbp["Incand Mass (fg)"]
minM_timelag = ddf_pbp.loc[
(ddf_pbp["Incand relPeak"] <= 5.05e6)
& (ddf_pbp["Incand relPeak"] >= 4.95e6),
"Incand Mass (fg)",
].mean() # this could create problems if there are no incandescence signals in the selected range
# minM_timelag = ddf_pbp.loc[
# (ddf_pbp["Incand relPeak"] <= 5.05e6)
# & (ddf_pbp["Incand relPeak"] >= 4.95e6),
# "Incand Mass (fg)",
# ].mean() # this could create problems if there are no incandescence signals in the selected range
ddf_pbp.loc[ddf_pbp["Incand relPeak"] == 0, "BC mass"] = np.nan
@@ -2165,7 +2165,7 @@ def process_pbp_parquet(
flag_inc_fwhm = (ddf_pbp["Incand FWHM"] >= ini_params["IncFWHMMin"]) & (
ddf_pbp["Incand FWHM"] <= ini_params["IncFWHMMax"]
)
flag_inc_not_sat = ddf_pbp["Incand relPeak"] < ini_params["IncSatPoint"]
# flag_inc_not_sat = ddf_pbp["Incand relPeak"] < ini_params["IncSatPoint"]
flag_scatt_transit_time = (
ddf_pbp["Scatter Transit Time"] >= ini_params["ScattTransitMin"]
@@ -2179,9 +2179,9 @@ def process_pbp_parquet(
flag_inc_in_range = (
flag_inc & (ddf_pbp["BC mass"] >= minM) & (ddf_pbp["BC mass"] <= maxM)
)
flag_inc_in_range_tl_analysis = (
flag_inc & (ddf_pbp["BC mass"] >= minM_timelag) & (ddf_pbp["BC mass"] <= maxM)
)
# flag_inc_in_range_tl_analysis = (
# flag_inc & (ddf_pbp["BC mass"] >= minM_timelag) & (ddf_pbp["BC mass"] <= maxM)
# )
flag_scatt = flag_scatt_transit_time & flag_scatt_fwhm
flag_scatt_in_range = (
@@ -2711,7 +2711,7 @@ def process_pbp_parquet(
final_df["hour"] = final_df.index.hour
final_df["date"] = final_df["date"].astype("date64[pyarrow]")
if save_final_data == True:
if save_final_data:
dd.from_pandas(final_df.sort_index(), npartitions=1).to_parquet(
path=path_parquet,
engine="pyarrow",
@@ -2777,11 +2777,11 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet="", save_final_data=False):
]
]
cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols
cols_for_count = ["temporary_col"]
# cols_for_count = ["temporary_col"]
data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f"{dt}s").mean()
data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f"{dt}s").sum()
data_resampled_count = dd_data[cols_for_count].resample(f"{dt}s").count()
# data_resampled_count = dd_data[cols_for_count].resample(f"{dt}s").count()
# merged = dd.merge(data_resampled_mean, data_resampled_sum, left_index=True, right_index=True, how='outer')
merged = pd.concat([data_resampled_mean, data_resampled_sum], axis=1)

View File

@@ -2,18 +2,15 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "4e2fec67-a2b4-4ca5-80e0-71ed4f54f58f",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"import pandas as pd\n",
"import numpy as np\n",
"import sys\n",
"import pickle\n",
"\n",
"from SP2XR_toolkit import *"
"from SP2XR_toolkit import calculate_calib_coeff"
]
},
{

View File

@@ -7,21 +7,23 @@ Created on Mon May 27 11:45:19 2024
import time
import pandas as pd
import numpy as np
import sys
import pickle
import dask.dataframe as dd
import dask
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
import datetime
import struct
import zipfile
from dask import delayed
import itertools
import gc
from SP2XR_toolkit import *
from SP2XR_toolkit import (
find_files,
read_and_process_sp2b,
read_csv_files_with_dask_2,
process_sp2b_parquet,
resample_to_dt,
chunks,
list_first_level_subdirs,
process_pbp_parquet,
get_file_dict,
)
# %% Define directories and folders
@@ -53,7 +55,7 @@ meta_file_sp2b = pd.read_parquet(
matching_files_pbp = find_files(source_directory, filter_string_pbp)
matching_files_hk = find_files(source_directory, filter_string_hk)
# matching_files_sp2b = find_files(source_directory, filter_string_sp2b)[10000:50000]
matching_files_sp2b = find_files(source_directory, filter_string_sp2b)[10000:50000]
# %% PBP: From csv/zip to parquet