From d62327ba25da9779f935ff3755a63686ef4ca9f8 Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Tue, 13 Feb 2024 16:39:05 +0100
Subject: [PATCH] Created g5505_utils.py module with very specific helper
 functions of the 5505 group. This is to remove clutter from the main module
 hdf5_lib.py

---
 g5505_utils.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 g5505_utils.py

diff --git a/g5505_utils.py b/g5505_utils.py
new file mode 100644
index 0000000..103f529
--- /dev/null
+++ b/g5505_utils.py
@@ -0,0 +1,51 @@
+import pandas as pd
+import os
+
+
+def is_callable_list(x : list):
+    return all([callable(item) for item in x])
+
+def is_str_list(x : list):
+    return all([isinstance(item,str) for item in x])
+
+def augment_with_filetype(df):
+    df['filetype'] = [os.path.splitext(item)[1][1::] for item in df['filename']]
+    #return [os.path.splitext(item)[1][1::] for item in df['filename']]
+    return df
+
+def augment_with_filenumber(df):  
+    df['filenumber'] = [item[0:item.find('_')] for item in df['filename']]
+    #return [item[0:item.find('_')] for item in df['filename']]
+    return df
+
+def group_by_df_column(df, column_name: str):
+    """
+    df (pandas.DataFrame): 
+    column_name (str): column_name of df by which grouping operation will take place.  
+    """
+
+    if not column_name in df.columns:
+        raise ValueError("column_name must be in the columns of df.")
+    
+    return df[column_name]
+
+def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame):
+    
+    sample_name = []
+    sample_quality = []
+    for item in input_data['sample']:
+        if item.find('(')!=-1:
+            #print(item)
+            sample_name.append(item[0:item.find('(')])
+            sample_quality.append(item[item.find('(')+1:len(item)-1])
+        else:            
+            if item=='':
+                sample_name.append('Not yet annotated')
+                sample_quality.append('unevaluated')
+            else:
+                sample_name.append(item)
+                sample_quality.append('good data')
+    input_data['sample'] = sample_name
+    input_data['data_quality'] = sample_quality
+
+    return input_data