feat: adapt sp2xr_pipeline and helpers to run multiple times across the same dataset and different time slots but ensuring config settings are the same for the entire dataset
This commit is contained in:
@@ -16,6 +16,7 @@ from sp2xr.helpers import (
|
||||
get_time_chunks_from_range,
|
||||
delete_partition_if_exists,
|
||||
partition_rowcount,
|
||||
validate_config_compatibility,
|
||||
)
|
||||
from sp2xr.calibration import calibrate_single_particle
|
||||
from sp2xr.resample_pbp_hk import (
|
||||
@@ -56,13 +57,42 @@ def main():
|
||||
|
||||
signal.signal(signal.SIGTERM, handle_sigterm)
|
||||
try:
|
||||
# -1. chunking
|
||||
"""# -1. chunking
|
||||
pbp_times = extract_partitioned_datetimes(run_config["input_pbp"])
|
||||
hk_times = extract_partitioned_datetimes(run_config["input_hk"])
|
||||
global_start = min(min(pbp_times), min(hk_times))
|
||||
global_end = max(max(pbp_times), max(hk_times))
|
||||
chunk_freq = run_config["chunking"]["freq"] # e.g. "6h", "3d"
|
||||
time_chunks = get_time_chunks_from_range(global_start, global_end, chunk_freq)
|
||||
"""
|
||||
# -1. Validate config compatibility
|
||||
validate_config_compatibility(run_config)
|
||||
|
||||
# -2. chunking
|
||||
pbp_times = extract_partitioned_datetimes(run_config["input_pbp"])
|
||||
hk_times = extract_partitioned_datetimes(run_config["input_hk"])
|
||||
|
||||
# Use config date range if specified, otherwise use data extent
|
||||
if run_config["chunking"]["start_date"]:
|
||||
global_start = pd.to_datetime(run_config["chunking"]["start_date"])
|
||||
print(f"Using config start_date: {global_start}")
|
||||
else:
|
||||
global_start = min(min(pbp_times), min(hk_times))
|
||||
print(f"Using data extent start: {global_start}")
|
||||
|
||||
if run_config["chunking"]["end_date"]:
|
||||
global_end = pd.to_datetime(run_config["chunking"]["end_date"])
|
||||
print(f"Using config end_date: {global_end}")
|
||||
else:
|
||||
global_end = max(max(pbp_times), max(hk_times))
|
||||
print(f"Using data extent end: {global_end}")
|
||||
|
||||
chunk_freq = run_config["chunking"]["freq"] # e.g. "6h", "3d"
|
||||
time_chunks = get_time_chunks_from_range(global_start, global_end, chunk_freq)
|
||||
|
||||
print(
|
||||
f"Processing {len(time_chunks)} time chunks from {global_start} to {global_end}"
|
||||
)
|
||||
|
||||
# 0. calibration stage --------------------------------------------
|
||||
instr_config = yaml.safe_load(open(run_config["instr_cfg"]))
|
||||
@@ -100,7 +130,7 @@ def main():
|
||||
pbp_filters.append(("hour", ">=", chunk_start.hour))
|
||||
pbp_filters.append(("hour", "<", chunk_end.hour))
|
||||
|
||||
client.restart()
|
||||
#client.restart()
|
||||
|
||||
scattered_bins = {
|
||||
"inc_mass_bin_lims": client.scatter(inc_mass_bin_lims, broadcast=True),
|
||||
|
||||
Reference in New Issue
Block a user