Function to read sp2b updated, added fillna(0) before resampling in the function resample_to_dt
This commit is contained in:
@ -296,7 +296,7 @@ def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory):
|
|||||||
|
|
||||||
# %% Functions to read sp2b files
|
# %% Functions to read sp2b files
|
||||||
|
|
||||||
|
@delayed
|
||||||
def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory):
|
def read_sp2b_from_sp2xr_zipped_2(file_path, meta, target_directory):
|
||||||
labview_epoch = datetime.datetime(1904, 1, 1)
|
labview_epoch = datetime.datetime(1904, 1, 1)
|
||||||
results = []
|
results = []
|
||||||
@ -582,10 +582,14 @@ def read_and_process_sp2b(matches, target_directory, meta_file):
|
|||||||
# def name_function(part_idx):
|
# def name_function(part_idx):
|
||||||
# return f"{first_elements[part_idx]}.parquet"
|
# return f"{first_elements[part_idx]}.parquet"
|
||||||
|
|
||||||
|
fn = file_path.split('\\')[-1].split('_')[-2] + '_' + file_path.split('\\')[-1].split('_')[-1].split('.')[-2]
|
||||||
|
def name(part_idx):
|
||||||
|
return f'{fn}.parquet'
|
||||||
|
|
||||||
combined_ddf.to_parquet(path=target_directory,
|
combined_ddf.to_parquet(path=target_directory,
|
||||||
engine='pyarrow',
|
engine='pyarrow',
|
||||||
partition_on=['date', 'hour'],
|
partition_on=['date', 'hour'],
|
||||||
name_function=name_function,
|
name_function=name,
|
||||||
write_index=True,
|
write_index=True,
|
||||||
append=False, schema='infer')
|
append=False, schema='infer')
|
||||||
|
|
||||||
@ -2069,8 +2073,8 @@ def resample_to_dt(dir_path_pbp, dt=60, path_parquet='', save_final_data=False):
|
|||||||
cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols
|
cols_for_sum = timelag_hist_cols + cnts_cols + addiotnal_cols
|
||||||
cols_for_count = ['temporary_col']
|
cols_for_count = ['temporary_col']
|
||||||
|
|
||||||
data_resampled_mean = dd_data[cols_for_mean].resample(f'{dt}s').mean()
|
data_resampled_mean = dd_data[cols_for_mean].fillna(0).resample(f'{dt}s').mean()
|
||||||
data_resampled_sum = dd_data[cols_for_sum].resample(f'{dt}s').sum()
|
data_resampled_sum = dd_data[cols_for_sum].fillna(0).resample(f'{dt}s').sum()
|
||||||
data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count()
|
data_resampled_count = dd_data[cols_for_count].resample(f'{dt}s').count()
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user