#!/usr/bin/python #-*-coding:UTF-8-*- import sys import pandas as pd from pandas import Series,DataFrame if len(sys.argv) != 3: print("usage:python sys.argv[0] unsorted_database sorted_database") sys.exit() data=pd.read_csv(sys.argv[1],sep='\t') data['pos']=data.Molecular_Profile.str.extract('(\d+)') data.loc[data['pos'].isnull(),'pos']=10000 data['pos']=data['pos'].astype(int) data.dtypes df_mapping = pd.DataFrame({ 'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'], }) sort_mapping = df_mapping.reset_index().set_index('Evidence_Source_C') data['level'] = data['Evidence_Source_C'].map(sort_mapping['index']) data.sort_values(by=['Gene_Symbol','pos','level'],ascending=True,inplace=True) data.drop(['pos','level'],axis=1,inplace=True) data.to_csv(sys.argv[2],index=False,sep='\t')