#!/usr/bin/python3 # -*- coding: UTF-8 -*- import pandas as pd from pandas import Series,DataFrame import re import os import sys if len(sys.argv) != 3: print(" ".join(['usage:python3',sys.argv[0],'output_dir','name'])) sys.exit() output_dir=sys.argv[1] name=sys.argv[2] snv_base="".join([name,'.snvindel.pos.txt']) snv_file='/'.join([output_dir,'mutation',snv_base]) snv_base_new="".join([name,'.snvindel.pos.dedup.txt']) snv_file_new='/'.join([output_dir,'mutation',snv_base_new]) fusion_base="".join([name,'.fusion.pos.txt']) fusion_file='/'.join([output_dir,'fusion',fusion_base]) fusion_base_new="".join([name,'.fusion.pos.dedup.txt']) fusion_file_new='/'.join([output_dir,'fusion',fusion_base_new]) cnv_base="".join([name,'.cnv.pos.txt']) cnv_file='/'.join([output_dir,'cnvkit',cnv_base]) cnv_base_new="".join([name,'.cnv.pos.dedup.txt']) cnv_file_new='/'.join([output_dir,'cnvkit',cnv_base_new]) gm_snv_file=''.join([output_dir,'/mutation/',name,'.snvindel.Germline.pos.txt']) gm_snv_file_new=''.join([output_dir,'/mutation/',name,'.snvindel.Germline.pos.dedup.txt']) open(snv_file_new, "w") open(fusion_file_new, "w") open(cnv_file_new, "w") open(gm_snv_file_new, "w") ##Evidence_Source_C及标签排序 df_mapping_1 = pd.DataFrame({ 'Evidence_Source_C': ['FDA', 'NMPA', 'NCCN', '临床III期', '临床II期', '临床I期', '临床试验', '回顾性研究', '个案', '临床前研究'], }) sort_mapping_1 = df_mapping_1.reset_index().set_index('Evidence_Source_C') df_mapping_2 = pd.DataFrame({'标签':['适应症','非适应症','.']}) sort_mapping_2 = df_mapping_2.reset_index().set_index('标签') ##snvindel处理 snv_size = os.path.getsize(snv_file) if snv_size>0: data=pd.read_table(snv_file,sep="\t") data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) data['level2'] = data['标签'].map(sort_mapping_2['index']) data.sort_values(by=['AAChange.refGene','level2','level1'],ascending=True,inplace=True) data.drop(['level1','level2'],axis=1,inplace=True) info={} for index,row in data.iterrows(): if re.search(r'敏感',row['Response_Type_C']): if row['标签']=='适应症': info[row['AAChange.refGene'] + row['Drug']]='1' else: if (row['AAChange.refGene'] + row['Drug']) in info.keys(): data.drop([index],inplace=True) data.insert(0,'可信',1) data.to_csv(snv_file_new,index=False,sep='\t') ##germline snv/indel处理 gm_snv_size = os.path.getsize(gm_snv_file) if gm_snv_size>0: data=pd.read_table(gm_snv_file,sep="\t") data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) data['level2'] = data['标签'].map(sort_mapping_2['index']) data.sort_values(by=['AAChange.refGene','level2','level1'],ascending=True,inplace=True) data.drop(['level1','level2'],axis=1,inplace=True) info={} for index,row in data.iterrows(): if re.search(r'敏感',row['Response_Type_C']): if row['标签']=='适应症': info[row['AAChange.refGene'] + row['Drug']]='1' else: if (row['AAChange.refGene'] + row['Drug']) in info.keys(): data.drop([index],inplace=True) data.insert(0,'可信',1) data.to_csv(gm_snv_file_new,index=False,sep='\t') ##fusion处理 fusion_size = os.path.getsize(fusion_file) if fusion_size>0: data=pd.read_table(fusion_file,sep="\t") data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) data['level2'] = data['标签'].map(sort_mapping_2['index']) data.sort_values(by=['FUSION','level2','level1'],ascending=True,inplace=True) data.drop(['level1','level2'],axis=1,inplace=True) info={} for index,row in data.iterrows(): if re.search(r'敏感',row['Response_Type_C']): if row['标签']=='适应症': info[row['FUSION'] + row['Drug']]='1' else: if (row['FUSION'] + row['Drug']) in info.keys(): data.drop([index],inplace=True) data.insert(0,'可信',1) data.to_csv(fusion_file_new,index=False,sep='\t') ##cnv处理 cnv_size = os.path.getsize(cnv_file) if cnv_size>0: data=pd.read_table(cnv_file,sep="\t") data['level1'] = data['Evidence_Source_C'].map(sort_mapping_1['index']) data['level2'] = data['标签'].map(sort_mapping_2['index']) data.sort_values(by=['Gene_Symbol','level2','level1'],ascending=True,inplace=True) data.drop(['level1','level2'],axis=1,inplace=True) info={} for index,row in data.iterrows(): if re.search(r'敏感',row['Response_Type_C']): if row['标签']=='适应症': info[row['Gene_Symbol'] + row['Drug']]='1' else: if (row['Gene_Symbol'] + row['Drug']) in info.keys(): data.drop([index],inplace=True) data.insert(0,'可信',1) data.to_csv(cnv_file_new,index=False,sep='\t')