68 lines
2.1 KiB
Python
68 lines
2.1 KiB
Python
import pandas as pd
|
|
import os
|
|
import shutil
|
|
|
|
def is_callable_list(x : list):
|
|
return all([callable(item) for item in x])
|
|
|
|
def is_str_list(x : list):
|
|
return all([isinstance(item,str) for item in x])
|
|
|
|
def augment_with_filetype(df):
|
|
df['filetype'] = [os.path.splitext(item)[1][1::] for item in df['filename']]
|
|
#return [os.path.splitext(item)[1][1::] for item in df['filename']]
|
|
return df
|
|
|
|
def augment_with_filenumber(df):
|
|
df['filenumber'] = [item[0:item.find('_')] for item in df['filename']]
|
|
#return [item[0:item.find('_')] for item in df['filename']]
|
|
return df
|
|
|
|
def group_by_df_column(df, column_name: str):
|
|
"""
|
|
df (pandas.DataFrame):
|
|
column_name (str): column_name of df by which grouping operation will take place.
|
|
"""
|
|
|
|
if not column_name in df.columns:
|
|
raise ValueError("column_name must be in the columns of df.")
|
|
|
|
return df[column_name]
|
|
|
|
def split_sample_col_into_sample_and_data_quality_cols(input_data: pd.DataFrame):
|
|
|
|
sample_name = []
|
|
sample_quality = []
|
|
for item in input_data['sample']:
|
|
if item.find('(')!=-1:
|
|
#print(item)
|
|
sample_name.append(item[0:item.find('(')])
|
|
sample_quality.append(item[item.find('(')+1:len(item)-1])
|
|
else:
|
|
if item=='':
|
|
sample_name.append('Not yet annotated')
|
|
sample_quality.append('unevaluated')
|
|
else:
|
|
sample_name.append(item)
|
|
sample_quality.append('good data')
|
|
input_data['sample'] = sample_name
|
|
input_data['data_quality'] = sample_quality
|
|
|
|
return input_data
|
|
|
|
def make_file_copy(source_file_path, output_folder_name : str = 'tmp_files'):
|
|
|
|
pathtail, filename = os.path.split(source_file_path)
|
|
#backup_filename = 'backup_'+ filename
|
|
backup_filename = filename
|
|
# Path
|
|
ROOT_DIR = os.path.abspath(os.curdir)
|
|
|
|
tmp_dirpath = os.path.join(ROOT_DIR,output_folder_name)
|
|
if not os.path.exists(tmp_dirpath):
|
|
os.mkdir(tmp_dirpath)
|
|
|
|
tmp_file_path = os.path.join(tmp_dirpath,backup_filename)
|
|
shutil.copy(source_file_path, tmp_file_path)
|
|
|
|
return tmp_file_path |