27 lines
908 B
Python
27 lines
908 B
Python
|
|
#!/usr/bin/python
|
||
|
|
#-*-coding:UTF-8-*-
|
||
|
|
|
||
|
|
import sys
|
||
|
|
import pandas as pd
|
||
|
|
from pandas import Series,DataFrame
|
||
|
|
|
||
|
|
if len(sys.argv) != 3:
|
||
|
|
print("usage:python sys.argv[0] unsorted_database sorted_database")
|
||
|
|
sys.exit()
|
||
|
|
|
||
|
|
data=pd.read_csv(sys.argv[1],sep='\t')
|
||
|
|
data['pos']=data.Molecular_Profile.str.extract('(\d+)')
|
||
|
|
data.loc[data['pos'].isnull(),'pos']=10000
|
||
|
|
data['pos']=data['pos'].astype(int)
|
||
|
|
data.dtypes
|
||
|
|
|
||
|
|
df_mapping = pd.DataFrame({
|
||
|
|
'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'],
|
||
|
|
})
|
||
|
|
sort_mapping = df_mapping.reset_index().set_index('Evidence_Source_C')
|
||
|
|
data['level'] = data['Evidence_Source_C'].map(sort_mapping['index'])
|
||
|
|
|
||
|
|
data.sort_values(by=['Gene_Symbol','pos','level'],ascending=True,inplace=True)
|
||
|
|
data.drop(['pos','level'],axis=1,inplace=True)
|
||
|
|
data.to_csv(sys.argv[2],index=False,sep='\t')
|