Files
dima/src/g5505_utils.py

68 lines
2.1 KiB
Python

import pandas as pd
import os
import shutil
def is_callable_list(x : list):
return all([callable(item) for item in x])
def is_str_list(x : list):
return all([isinstance(item,str) for item in x])
def augment_with_filetype(df):
df['filetype'] = [os.path.splitext(item)[1][1::] for item in df['filename']]
#return [os.path.splitext(item)[1][1::] for item in df['filename']]
return df
def augment_with_filenumber(df):
df['filenumber'] = [item[0:item.find('_')] for item in df['filename']]
#return [item[0:item.find('_')] for item in df['filename']]
return df
def group_by_df_column(df, column_name: str):
"""
df (pandas.DataFrame):
column_name (str): column_name of df by which grouping operation will take place.
"""
if not column_name in df.columns:
raise ValueError("column_name must be in the columns of df.")
return df[column_name]
def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame):
sample_name = []
sample_quality = []
for item in input_data['sample']:
if item.find('(')!=-1:
#print(item)
sample_name.append(item[0:item.find('(')])
sample_quality.append(item[item.find('(')+1:len(item)-1])
else:
if item=='':
sample_name.append('Not yet annotated')
sample_quality.append('unevaluated')
else:
sample_name.append(item)
sample_quality.append('good data')
input_data['sample'] = sample_name
input_data['data_quality'] = sample_quality
return input_data
def make_file_copy(source_file_path, output_folder_name : str = 'tmp_files'):
pathtail, filename = os.path.split(source_file_path)
#backup_filename = 'backup_'+ filename
backup_filename = filename
# Path
ROOT_DIR = os.path.abspath(os.curdir)
tmp_dirpath = os.path.join(ROOT_DIR,output_folder_name)
if not os.path.exists(tmp_dirpath):
os.mkdir(tmp_dirpath)
tmp_file_path = os.path.join(tmp_dirpath,backup_filename)
shutil.copy(source_file_path, tmp_file_path)
return tmp_file_path