Function to read sp2b updated, added fillna(0) before resampling in the function resample_to_dt

This commit is contained in:
2024-09-23 12:45:53 +02:00
parent a42148cc13
commit a4bbf356af

View File

@ -296,7 +296,7 @@ def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory):
# %% Functions to read sp2b files
@delayed
def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory):
labview_epoch = datetime.datetime(1904, 1, 1)
results = []
@ -582,10 +582,14 @@ def read_and_process_sp2b(matches, target_directory, meta_file):
# def name_function(part_idx):
# return f"{first_elements[part_idx]}.parquet"
fn = file_path.split('\\')[-1].split('_')[-2] + '_' + file_path.split('\\')[-1].split('_')[-1].split('.')[-2]
def name(part_idx):
return f'{fn}.parquet'
combined_ddf.to_parquet(path=target_directory,
engine='pyarrow',
partition_on=['date', 'hour'],
name_function=name_function,
name_function=name,
write_index=True,
append=False, schema='infer')
@ -2069,8 +2073,8 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False):
cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols
cols_for_count = ['temporary_col']
data_resampled_mean = dd_data[cols_for_mean].resample(f'{dt}s').mean()
data_resampled_sum = dd_data[cols_for_sum].resample(f'{dt}s').sum()
data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f'{dt}s').mean()
data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f'{dt}s').sum()
data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count()