Created g5505_utils.py module with very specific helper functions of the 5505 group. This is to remove clutter from the main module hdf5_lib.py
This commit is contained in:
51
g5505_utils.py
Normal file
51
g5505_utils.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def is_callable_list(x : list):
|
||||||
|
return all([callable(item) for item in x])
|
||||||
|
|
||||||
|
def is_str_list(x : list):
|
||||||
|
return all([isinstance(item,str) for item in x])
|
||||||
|
|
||||||
|
def augment_with_filetype(df):
|
||||||
|
df['filetype'] = [os.path.splitext(item)[1][1::] for item in df['filename']]
|
||||||
|
#return [os.path.splitext(item)[1][1::] for item in df['filename']]
|
||||||
|
return df
|
||||||
|
|
||||||
|
def augment_with_filenumber(df):
|
||||||
|
df['filenumber'] = [item[0:item.find('_')] for item in df['filename']]
|
||||||
|
#return [item[0:item.find('_')] for item in df['filename']]
|
||||||
|
return df
|
||||||
|
|
||||||
|
def group_by_df_column(df, column_name: str):
|
||||||
|
"""
|
||||||
|
df (pandas.DataFrame):
|
||||||
|
column_name (str): column_name of df by which grouping operation will take place.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not column_name in df.columns:
|
||||||
|
raise ValueError("column_name must be in the columns of df.")
|
||||||
|
|
||||||
|
return df[column_name]
|
||||||
|
|
||||||
|
def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame):
|
||||||
|
|
||||||
|
sample_name = []
|
||||||
|
sample_quality = []
|
||||||
|
for item in input_data['sample']:
|
||||||
|
if item.find('(')!=-1:
|
||||||
|
#print(item)
|
||||||
|
sample_name.append(item[0:item.find('(')])
|
||||||
|
sample_quality.append(item[item.find('(')+1:len(item)-1])
|
||||||
|
else:
|
||||||
|
if item=='':
|
||||||
|
sample_name.append('Not yet annotated')
|
||||||
|
sample_quality.append('unevaluated')
|
||||||
|
else:
|
||||||
|
sample_name.append(item)
|
||||||
|
sample_quality.append('good data')
|
||||||
|
input_data['sample'] = sample_name
|
||||||
|
input_data['data_quality'] = sample_quality
|
||||||
|
|
||||||
|
return input_data
|
Reference in New Issue
Block a user