style: apply Black auto‑formatting
This commit is contained in:
@@ -24,7 +24,7 @@ from dask import delayed
|
||||
import time
|
||||
|
||||
import ipywidgets as widgets
|
||||
from IPython.display import display, clear_output
|
||||
from IPython.display import display
|
||||
|
||||
|
||||
from matplotlib.backends.backend_pdf import PdfPages
|
||||
@@ -381,7 +381,7 @@ def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory):
|
||||
|
||||
def process_block(f, file_size):
|
||||
while f.tell() < file_size:
|
||||
initial_pos = f.tell()
|
||||
# initial_pos = f.tell()
|
||||
size_2d_array = struct.unpack("> 2i", f.read(8))
|
||||
|
||||
if size_2d_array[0] == 0:
|
||||
@@ -575,7 +575,7 @@ def read_sp2b_from_sp2xr_zipped(file_path, meta):
|
||||
|
||||
def process_block(f, file_size):
|
||||
while f.tell() < file_size:
|
||||
initial_pos = f.tell()
|
||||
# initial_pos = f.tell()
|
||||
size_2d_array = struct.unpack("> 2i", f.read(8))
|
||||
|
||||
if size_2d_array[0] == 0:
|
||||
@@ -687,7 +687,7 @@ def read_sp2b_from_sp2xr_zipped(file_path, meta):
|
||||
return meta
|
||||
|
||||
|
||||
def read_and_process_sp2b(matches, target_directory, meta_file):
|
||||
def read_and_process_sp2b(matches, target_directory, meta_file, file_path):
|
||||
if len(matches) > 0:
|
||||
|
||||
delayed_results = [
|
||||
@@ -2116,19 +2116,19 @@ def process_pbp_parquet(
|
||||
ddf_pbp["BC mass"] = ddf_pbp["Incand relPeak"].apply(
|
||||
lambda x: polynomial(x, *inc_calib_params)
|
||||
) # , meta=('BC mass', 'float64'))
|
||||
minM_timelag = polynomial(5e6, *inc_calib_params)
|
||||
# minM_timelag = polynomial(5e6, *inc_calib_params)
|
||||
elif inc_calib_curve == "powerlaw":
|
||||
ddf_pbp["BC mass"] = ddf_pbp["Incand relPeak"].apply(
|
||||
lambda x: powerlaw(x, inc_calib_params)
|
||||
) # , meta=('BC mass', 'float64'))
|
||||
minM_timelag = powerlaw(5e6, inc_calib_params)
|
||||
# minM_timelag = powerlaw(5e6, inc_calib_params)
|
||||
else:
|
||||
ddf_pbp["BC mass"] = ddf_pbp["Incand Mass (fg)"]
|
||||
minM_timelag = ddf_pbp.loc[
|
||||
(ddf_pbp["Incand relPeak"] <= 5.05e6)
|
||||
& (ddf_pbp["Incand relPeak"] >= 4.95e6),
|
||||
"Incand Mass (fg)",
|
||||
].mean() # this could create problems if there are no incandescence signals in the selected range
|
||||
# minM_timelag = ddf_pbp.loc[
|
||||
# (ddf_pbp["Incand relPeak"] <= 5.05e6)
|
||||
# & (ddf_pbp["Incand relPeak"] >= 4.95e6),
|
||||
# "Incand Mass (fg)",
|
||||
# ].mean() # this could create problems if there are no incandescence signals in the selected range
|
||||
|
||||
ddf_pbp.loc[ddf_pbp["Incand relPeak"] == 0, "BC mass"] = np.nan
|
||||
|
||||
@@ -2165,7 +2165,7 @@ def process_pbp_parquet(
|
||||
flag_inc_fwhm = (ddf_pbp["Incand FWHM"] >= ini_params["IncFWHMMin"]) & (
|
||||
ddf_pbp["Incand FWHM"] <= ini_params["IncFWHMMax"]
|
||||
)
|
||||
flag_inc_not_sat = ddf_pbp["Incand relPeak"] < ini_params["IncSatPoint"]
|
||||
# flag_inc_not_sat = ddf_pbp["Incand relPeak"] < ini_params["IncSatPoint"]
|
||||
|
||||
flag_scatt_transit_time = (
|
||||
ddf_pbp["Scatter Transit Time"] >= ini_params["ScattTransitMin"]
|
||||
@@ -2179,9 +2179,9 @@ def process_pbp_parquet(
|
||||
flag_inc_in_range = (
|
||||
flag_inc & (ddf_pbp["BC mass"] >= minM) & (ddf_pbp["BC mass"] <= maxM)
|
||||
)
|
||||
flag_inc_in_range_tl_analysis = (
|
||||
flag_inc & (ddf_pbp["BC mass"] >= minM_timelag) & (ddf_pbp["BC mass"] <= maxM)
|
||||
)
|
||||
# flag_inc_in_range_tl_analysis = (
|
||||
# flag_inc & (ddf_pbp["BC mass"] >= minM_timelag) & (ddf_pbp["BC mass"] <= maxM)
|
||||
# )
|
||||
|
||||
flag_scatt = flag_scatt_transit_time & flag_scatt_fwhm
|
||||
flag_scatt_in_range = (
|
||||
@@ -2711,7 +2711,7 @@ def process_pbp_parquet(
|
||||
final_df["hour"] = final_df.index.hour
|
||||
final_df["date"] = final_df["date"].astype("date64[pyarrow]")
|
||||
|
||||
if save_final_data == True:
|
||||
if save_final_data:
|
||||
dd.from_pandas(final_df.sort_index(), npartitions=1).to_parquet(
|
||||
path=path_parquet,
|
||||
engine="pyarrow",
|
||||
@@ -2777,11 +2777,11 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet="", save_final_data=False):
|
||||
]
|
||||
]
|
||||
cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols
|
||||
cols_for_count = ["temporary_col"]
|
||||
# cols_for_count = ["temporary_col"]
|
||||
|
||||
data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f"{dt}s").mean()
|
||||
data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f"{dt}s").sum()
|
||||
data_resampled_count = dd_data[cols_for_count].resample(f"{dt}s").count()
|
||||
# data_resampled_count = dd_data[cols_for_count].resample(f"{dt}s").count()
|
||||
|
||||
# merged = dd.merge(data_resampled_mean, data_resampled_sum, left_index=True, right_index=True, how='outer')
|
||||
merged = pd.concat([data_resampled_mean, data_resampled_sum], axis=1)
|
||||
|
||||
@@ -2,18 +2,15 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "4e2fec67-a2b4-4ca5-80e0-71ed4f54f58f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import sys\n",
|
||||
"import pickle\n",
|
||||
"\n",
|
||||
"from SP2XR_toolkit import *"
|
||||
"from SP2XR_toolkit import calculate_calib_coeff"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,21 +7,23 @@ Created on Mon May 27 11:45:19 2024
|
||||
|
||||
import time
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
import pickle
|
||||
import dask.dataframe as dd
|
||||
import dask
|
||||
from dask.distributed import Client
|
||||
from dask_jobqueue import SLURMCluster
|
||||
import datetime
|
||||
import struct
|
||||
import zipfile
|
||||
from dask import delayed
|
||||
import itertools
|
||||
import gc
|
||||
|
||||
from SP2XR_toolkit import *
|
||||
from SP2XR_toolkit import (
|
||||
find_files,
|
||||
read_and_process_sp2b,
|
||||
read_csv_files_with_dask_2,
|
||||
process_sp2b_parquet,
|
||||
resample_to_dt,
|
||||
chunks,
|
||||
list_first_level_subdirs,
|
||||
process_pbp_parquet,
|
||||
get_file_dict,
|
||||
)
|
||||
|
||||
|
||||
# %% Define directories and folders
|
||||
@@ -53,7 +55,7 @@ meta_file_sp2b = pd.read_parquet(
|
||||
|
||||
matching_files_pbp = find_files(source_directory, filter_string_pbp)
|
||||
matching_files_hk = find_files(source_directory, filter_string_hk)
|
||||
# matching_files_sp2b = find_files(source_directory, filter_string_sp2b)[10000:50000]
|
||||
matching_files_sp2b = find_files(source_directory, filter_string_sp2b)[10000:50000]
|
||||
|
||||
|
||||
# %% PBP: From csv/zip to parquet
|
||||
|
||||
Reference in New Issue
Block a user