Included optional argument in make_copy function and commented out a few lines that increase dataset storage complexity.
This commit is contained in:
@ -181,14 +181,14 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
file_dict['datasets'].append(dataset)
|
file_dict['datasets'].append(dataset)
|
||||||
|
|
||||||
if 'timestamps' in categorical_variables:
|
#if 'timestamps' in categorical_variables:
|
||||||
dataset = {}
|
# dataset = {}
|
||||||
dataset['name'] = 'timestamps'
|
# dataset['name'] = 'timestamps'
|
||||||
dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
|
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
|
||||||
dataset['shape'] = dataset['data'].shape
|
# dataset['shape'] = dataset['data'].shape
|
||||||
dataset['dtype'] = type(dataset['data'])
|
# dataset['dtype'] = type(dataset['data'])
|
||||||
file_dict['datasets'].append(dataset)
|
# file_dict['datasets'].append(dataset)
|
||||||
categorical_variables.remove('timestamps')
|
# categorical_variables.remove('timestamps')
|
||||||
|
|
||||||
if categorical_variables:
|
if categorical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
|
@ -50,7 +50,7 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame)
|
|||||||
|
|
||||||
return input_data
|
return input_data
|
||||||
|
|
||||||
def make_file_copy(source_file_path):
|
def make_file_copy(source_file_path, output_folder_name : str = 'tmp_files'):
|
||||||
|
|
||||||
pathtail, filename = os.path.split(source_file_path)
|
pathtail, filename = os.path.split(source_file_path)
|
||||||
#backup_filename = 'backup_'+ filename
|
#backup_filename = 'backup_'+ filename
|
||||||
@ -58,7 +58,7 @@ def make_file_copy(source_file_path):
|
|||||||
# Path
|
# Path
|
||||||
ROOT_DIR = os.path.abspath(os.curdir)
|
ROOT_DIR = os.path.abspath(os.curdir)
|
||||||
|
|
||||||
tmp_dirpath = os.path.join(ROOT_DIR,'tmp_files')
|
tmp_dirpath = os.path.join(ROOT_DIR,output_folder_name)
|
||||||
if not os.path.exists(tmp_dirpath):
|
if not os.path.exists(tmp_dirpath):
|
||||||
os.mkdir(tmp_dirpath)
|
os.mkdir(tmp_dirpath)
|
||||||
|
|
||||||
|
@ -104,16 +104,14 @@ def read_txt_files_as_dict(filename : str ):
|
|||||||
dataset['dtype'] = type(dataset['data'])
|
dataset['dtype'] = type(dataset['data'])
|
||||||
file_dict['datasets'].append(dataset)
|
file_dict['datasets'].append(dataset)
|
||||||
|
|
||||||
|
#if 'timestamps' in categorical_variables:
|
||||||
|
# dataset = {}
|
||||||
if 'timestamps' in categorical_variables:
|
# dataset['name'] = 'timestamps'
|
||||||
dataset = {}
|
# dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
|
||||||
dataset['name'] = 'timestamps'
|
# dataset['shape'] = dataset['data'].shape
|
||||||
dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
|
# dataset['dtype'] = type(dataset['data'])
|
||||||
dataset['shape'] = dataset['data'].shape
|
# file_dict['datasets'].append(dataset)
|
||||||
dataset['dtype'] = type(dataset['data'])
|
# categorical_variables.remove('timestamps')
|
||||||
file_dict['datasets'].append(dataset)
|
|
||||||
categorical_variables.remove('timestamps')
|
|
||||||
|
|
||||||
if categorical_variables:
|
if categorical_variables:
|
||||||
dataset = {}
|
dataset = {}
|
||||||
|
Reference in New Issue
Block a user