diff --git a/SP2XR_toolkit.py b/SP2XR_toolkit.py index d963b28..3e19ec8 100644 --- a/SP2XR_toolkit.py +++ b/SP2XR_toolkit.py @@ -296,7 +296,7 @@ def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory): # %% Functions to read sp2b files - +@delayed def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory): labview_epoch = datetime.datetime(1904, 1, 1) results = [] @@ -582,10 +582,14 @@ def read_and_process_sp2b(matches, target_directory, meta_file): # def name_function(part_idx): # return f"{first_elements[part_idx]}.parquet" + fn = file_path.split('\\')[-1].split('_')[-2] + '_' + file_path.split('\\')[-1].split('_')[-1].split('.')[-2] + def name(part_idx): + return f'{fn}.parquet' + combined_ddf.to_parquet(path=target_directory, engine='pyarrow', partition_on=['date', 'hour'], - name_function=name_function, + name_function=name, write_index=True, append=False, schema='infer') @@ -2069,8 +2073,8 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False): cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols cols_for_count = ['temporary_col'] - data_resampled_mean = dd_data[cols_for_mean].resample(f'{dt}s').mean() - data_resampled_sum = dd_data[cols_for_sum].resample(f'{dt}s').sum() + data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f'{dt}s').mean() + data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f'{dt}s').sum() data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count()