Included optional argument in make_copy function and commented out a few lines that increase dataset storage complexity.

2024-03-21 17:16:14 +01:00
parent 4244e39232
commit fff935f551
3 changed files with 18 additions and 20 deletions
--- a/src/g5505_file_reader.py
+++ b/src/g5505_file_reader.py
@ -181,14 +181,14 @@ def read_txt_files_as_dict(filename : str ):
            dataset['dtype'] = type(dataset['data'])
            file_dict['datasets'].append(dataset)            
-        if 'timestamps' in categorical_variables:
+        #if 'timestamps' in categorical_variables:
-            dataset = {}
+        #    dataset = {}
-            dataset['name'] = 'timestamps'
+        #    dataset['name'] = 'timestamps'
-            dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
+        #    dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
-            dataset['shape'] = dataset['data'].shape
+        #    dataset['shape'] = dataset['data'].shape
-            dataset['dtype'] = type(dataset['data'])
+        #    dataset['dtype'] = type(dataset['data'])
-            file_dict['datasets'].append(dataset)            
+        #    file_dict['datasets'].append(dataset)            
-            categorical_variables.remove('timestamps')
+        #    categorical_variables.remove('timestamps')
        if categorical_variables:
            dataset = {}
--- a/src/g5505_utils.py
+++ b/src/g5505_utils.py
@ -50,7 +50,7 @@ def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame)
    return input_data
-def make_file_copy(source_file_path):
+def make_file_copy(source_file_path, output_folder_name : str = 'tmp_files'):
    pathtail, filename = os.path.split(source_file_path)
    #backup_filename = 'backup_'+ filename
@ -58,7 +58,7 @@ def make_file_copy(source_file_path):
    # Path                     
    ROOT_DIR = os.path.abspath(os.curdir)
-    tmp_dirpath = os.path.join(ROOT_DIR,'tmp_files')
+    tmp_dirpath = os.path.join(ROOT_DIR,output_folder_name)
    if not os.path.exists(tmp_dirpath):
        os.mkdir(tmp_dirpath)
--- a/src/smog_chamber_file_reader.py
+++ b/src/smog_chamber_file_reader.py
@ -103,17 +103,15 @@ def read_txt_files_as_dict(filename : str ):
        dataset['shape'] = dataset['data'].shape
        dataset['dtype'] = type(dataset['data'])
        file_dict['datasets'].append(dataset) 
-    if 'timestamps' in categorical_variables:
+    #if 'timestamps' in categorical_variables:
-        dataset = {}
+    #    dataset = {}
-        dataset['name'] = 'timestamps'
+    #    dataset['name'] = 'timestamps'
-        dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
+    #    dataset['data'] = df_categorical_attrs['timestamps'].to_numpy().reshape((rows,1))
-        dataset['shape'] = dataset['data'].shape
+    #    dataset['shape'] = dataset['data'].shape
-        dataset['dtype'] = type(dataset['data'])
+    #    dataset['dtype'] = type(dataset['data'])
-        file_dict['datasets'].append(dataset)            
+    #    file_dict['datasets'].append(dataset)            
-        categorical_variables.remove('timestamps')
+    #    categorical_variables.remove('timestamps')
    if categorical_variables:
        dataset = {}