From d62327ba25da9779f935ff3755a63686ef4ca9f8 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 13 Feb 2024 16:39:05 +0100 Subject: [PATCH] Created g5505_utils.py module with very specific helper functions of the 5505 group. This is to remove clutter from the main module hdf5_lib.py --- g5505_utils.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 g5505_utils.py diff --git a/g5505_utils.py b/g5505_utils.py new file mode 100644 index 0000000..103f529 --- /dev/null +++ b/g5505_utils.py @@ -0,0 +1,51 @@ +import pandas as pd +import os + + +def is_callable_list(x : list): + return all([callable(item) for item in x]) + +def is_str_list(x : list): + return all([isinstance(item,str) for item in x]) + +def augment_with_filetype(df): + df['filetype'] = [os.path.splitext(item)[1][1::] for item in df['filename']] + #return [os.path.splitext(item)[1][1::] for item in df['filename']] + return df + +def augment_with_filenumber(df): + df['filenumber'] = [item[0:item.find('_')] for item in df['filename']] + #return [item[0:item.find('_')] for item in df['filename']] + return df + +def group_by_df_column(df, column_name: str): + """ + df (pandas.DataFrame): + column_name (str): column_name of df by which grouping operation will take place. + """ + + if not column_name in df.columns: + raise ValueError("column_name must be in the columns of df.") + + return df[column_name] + +def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame): + + sample_name = [] + sample_quality = [] + for item in input_data['sample']: + if item.find('(')!=-1: + #print(item) + sample_name.append(item[0:item.find('(')]) + sample_quality.append(item[item.find('(')+1:len(item)-1]) + else: + if item=='': + sample_name.append('Not yet annotated') + sample_quality.append('unevaluated') + else: + sample_name.append(item) + sample_quality.append('good data') + input_data['sample'] = sample_name + input_data['data_quality'] = sample_quality + + return input_data