Included optional argument in make_copy function and commented out a few lines that increase dataset storage complexity.

This commit is contained in:
2024-03-21 17:16:14 +01:00
parent 4244e39232
commit fff935f551
3 changed files with 18 additions and 20 deletions

View File

@ -181,14 +181,14 @@ def read_txt_files_as_dict(filename : str ):
dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset)
if 'timestamps' in categorical_variables:
dataset = {}
dataset['name'] = 'timestamps'
dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset)
categorical_variables.remove('timestamps')
#if 'timestamps' in categorical_variables:
# dataset = {}
# dataset['name'] = 'timestamps'
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
# dataset['shape'] = dataset['data'].shape
# dataset['dtype'] = type(dataset['data'])
# file_dict['datasets'].append(dataset)
# categorical_variables.remove('timestamps')
if categorical_variables:
dataset = {}

View File

@ -50,7 +50,7 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame)
return input_data
def make_file_copy(source_file_path):
def make_file_copy(source_file_path, output_folder_name : str = 'tmp_files'):
pathtail, filename = os.path.split(source_file_path)
#backup_filename = 'backup_'+ filename
@ -58,7 +58,7 @@ def make_file_copy(source_file_path):
# Path
ROOT_DIR = os.path.abspath(os.curdir)
tmp_dirpath = os.path.join(ROOT_DIR,'tmp_files')
tmp_dirpath = os.path.join(ROOT_DIR,output_folder_name)
if not os.path.exists(tmp_dirpath):
os.mkdir(tmp_dirpath)

View File

@ -103,17 +103,15 @@ def read_txt_files_as_dict(filename : str ):
dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset)
if 'timestamps' in categorical_variables:
dataset = {}
dataset['name'] = 'timestamps'
dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
dataset['shape'] = dataset['data'].shape
dataset['dtype'] = type(dataset['data'])
file_dict['datasets'].append(dataset)
categorical_variables.remove('timestamps')
#if 'timestamps' in categorical_variables:
# dataset = {}
# dataset['name'] = 'timestamps'
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
# dataset['shape'] = dataset['data'].shape
# dataset['dtype'] = type(dataset['data'])
# file_dict['datasets'].append(dataset)
# categorical_variables.remove('timestamps')
if categorical_variables:
dataset = {}