database/targettherapy_database_sort.py

27 lines
908 B
Python
Raw Normal View History

2023-07-18 15:29:56 +08:00
#!/usr/bin/python
#-*-coding:UTF-8-*-
import sys
import pandas as pd
from pandas import Series,DataFrame
if len(sys.argv) != 3:
print("usage:python sys.argv[0] unsorted_database sorted_database")
sys.exit()
data=pd.read_csv(sys.argv[1],sep='\t')
data['pos']=data.Molecular_Profile.str.extract('(\d+)')
data.loc[data['pos'].isnull(),'pos']=10000
data['pos']=data['pos'].astype(int)
data.dtypes
df_mapping = pd.DataFrame({
'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'],
})
sort_mapping = df_mapping.reset_index().set_index('Evidence_Source_C')
data['level'] = data['Evidence_Source_C'].map(sort_mapping['index'])
data.sort_values(by=['Gene_Symbol','pos','level'],ascending=True,inplace=True)
data.drop(['pos','level'],axis=1,inplace=True)
data.to_csv(sys.argv[2],index=False,sep='\t')