Included optional argument in make_copy function and commented out a few lines that increase dataset storage complexity.

This commit is contained in:
2024-03-21 17:16:14 +01:00
parent 4244e39232
commit fff935f551
3 changed files with 18 additions and 20 deletions

View File

@ -181,14 +181,14 @@ def read_txt_files_as_dict(filename : str ):
dataset['dtype'] = type(dataset['data']) dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset) file_dict['datasets'].append(dataset)
if 'timestamps' in categorical_variables: #if 'timestamps' in categorical_variables:
dataset = {} # dataset = {}
dataset['name'] = 'timestamps' # dataset['name'] = 'timestamps'
dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1)) # dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
dataset['shape'] = dataset['data'].shape # dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data']) # dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset) # file_dict['datasets'].append(dataset)
categorical_variables.remove('timestamps') # categorical_variables.remove('timestamps')
if categorical_variables: if categorical_variables:
dataset = {} dataset = {}

View File

@ -50,7 +50,7 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame)
return input_data return input_data
def make_file_copy(source_file_path): def make_file_copy(source_file_path, output_folder_name : str = 'tmp_files'):
pathtail, filename = os.path.split(source_file_path) pathtail, filename = os.path.split(source_file_path)
#backup_filename = 'backup_'+ filename #backup_filename = 'backup_'+ filename
@ -58,7 +58,7 @@ def make_file_copy(source_file_path):
# Path # Path
ROOT_DIR = os.path.abspath(os.curdir) ROOT_DIR = os.path.abspath(os.curdir)
tmp_dirpath = os.path.join(ROOT_DIR,'tmp_files') tmp_dirpath = os.path.join(ROOT_DIR,output_folder_name)
if not os.path.exists(tmp_dirpath): if not os.path.exists(tmp_dirpath):
os.mkdir(tmp_dirpath) os.mkdir(tmp_dirpath)

View File

@ -103,17 +103,15 @@ def read_txt_files_as_dict(filename : str ):
dataset['shape'] = dataset['data'].shape dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data']) dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset) file_dict['datasets'].append(dataset)
if 'timestamps' in categorical_variables: #if 'timestamps' in categorical_variables:
dataset = {} # dataset = {}
dataset['name'] = 'timestamps' # dataset['name'] = 'timestamps'
dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1)) # dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
dataset['shape'] = dataset['data'].shape # dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data']) # dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset) # file_dict['datasets'].append(dataset)
categorical_variables.remove('timestamps') # categorical_variables.remove('timestamps')
if categorical_variables: if categorical_variables:
dataset = {} dataset = {}