865 lines
38 KiB
Python
865 lines
38 KiB
Python
#!/usr/bin/python3
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
##https://www.pianshen.com/article/5314917437/
|
|
##https://zhuanlan.zhihu.com/p/366902690
|
|
##https://itpcb.com/a/277599
|
|
import docxtpl
|
|
from docx.shared import Mm
|
|
from docxtpl import DocxTemplate,RichText
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import re
|
|
import sys
|
|
import os
|
|
import json
|
|
import time
|
|
|
|
if len(sys.argv) != 5:
|
|
print(" ".join(['usage:python',sys.argv[0],'output_dir','tumor','sample_type(t for tissue,c for cfdna)']),'projcet')
|
|
sys.exit()
|
|
|
|
|
|
output_dir=sys.argv[1]
|
|
name=sys.argv[2]
|
|
Sample_type=sys.argv[3]
|
|
projcet=sys.argv[4]
|
|
snv_base="".join([name,'.snvindel.pos.dedup.txt'])
|
|
snv_file='/'.join([output_dir,'mutation',snv_base])
|
|
snv_base_vus="".join([name,'.snvindel.vus.txt'])
|
|
snv_file_vus='/'.join([output_dir,'mutation',snv_base_vus])
|
|
fusion_base="".join([name,'.fusion.pos.dedup.txt'])
|
|
fusion_file='/'.join([output_dir,'fusion',fusion_base])
|
|
cnv_base="".join([name,'.cnv.pos.dedup.txt'])
|
|
cnv_file='/'.join([output_dir,'cnvkit',cnv_base])
|
|
qc_base=''.join([name,'_qc.txt'])
|
|
qc_file='/'.join([output_dir,'qc',qc_base])
|
|
report_base="".join([name,'_report.docx'])
|
|
report_file='/'.join([output_dir,'report',report_base])
|
|
indication_file="".join([output_dir,'/mutation/','indication.txt'])
|
|
|
|
context = {'list1':[],'list2':{},'list3':{},'clingene1':[],'clingene2':[],'nonclingenes':[],'genefunc':{},
|
|
'indication':[],'mmr':[],'chemo':[]}
|
|
|
|
|
|
#genefunction
|
|
genefunction={}
|
|
gf=open("/dataseq/jmdna/codes/reportbase/gene_function.txt",'r',encoding='utf-8').readlines()
|
|
for line in gf[1:]:
|
|
gene=line.strip().split("\t")[0]
|
|
func=line.strip().split("\t")[1]
|
|
genefunction[gene.upper()]=func
|
|
|
|
##sensitive_resistant_drug
|
|
drug_disease={}
|
|
drug_mechanism={}
|
|
drug_fh=open("/dataseq/jmdna/codes/reportbase/target_drug.txt",'r',encoding='utf-8').readlines()
|
|
for line in drug_fh[1:]:
|
|
disease=line.split("\t")[8]
|
|
mechanism=line.split("\t")[11]
|
|
drugs=line.split("\t")[0].split('|')
|
|
if disease or mechanism:
|
|
for drug in drugs:
|
|
drug_disease[drug.upper()]=disease
|
|
drug_mechanism[drug.upper()]=mechanism
|
|
|
|
|
|
sensitive_resistant_drug=[{'type':'可能敏感药物','drug':[]},{'type':'可能耐药药物','drug':[]}]
|
|
|
|
##somatic snvindel处理
|
|
snv_size = os.path.getsize(snv_file)
|
|
if snv_size>0:
|
|
snv=pd.read_table(snv_file,sep="\t")
|
|
cols=[index for index,row in snv[snv['可信']==0].iterrows()]
|
|
snv.drop(cols,inplace=True)
|
|
genes=snv['Gene.refGene'].drop_duplicates()
|
|
if len(genes):
|
|
for gene in genes:
|
|
rt={}
|
|
rt[gene]=[]
|
|
muts=snv['AAChange.refGene'][snv['Gene.refGene']==gene].drop_duplicates()
|
|
for mut in muts:
|
|
info2={mut:[]}
|
|
for index,row in snv[snv['AAChange.refGene']==mut].iterrows():
|
|
info3={}
|
|
info3['drug']=row['药物中文名']
|
|
info3['effect']=row['Response_Type_C']
|
|
info3['tumor']=row['疾病中文名']
|
|
info3['evidence']=row['Evidence_Source_C']
|
|
info3['sig']=row['EfficacyEvidence']
|
|
info2[mut].append(info3)
|
|
##sensitive_resistant_drug
|
|
drugs=row['Drug'].replace(" + ",",")
|
|
drugs=drugs.split(",")
|
|
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
|
drugs_chinese=drugs_chinese.split(",")
|
|
bool=0
|
|
for drug in drugs:
|
|
if drug.upper() in drug_disease.keys():
|
|
if re.search(r'敏感',row['Response_Type_C']):
|
|
sensitive_drug={}
|
|
sensitive_drug['name']=drugs_chinese[bool]
|
|
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
|
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
|
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
|
elif re.search(r'耐药',row['Response_Type_C']):
|
|
resistant_drug={}
|
|
resistant_drug['name']=drugs_chinese[bool]
|
|
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
|
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
|
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
|
bool+=1
|
|
context['list2'][mut]=info2[mut]
|
|
info={}
|
|
info['gene']=snv['Gene.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
|
m=re.search(r'(p\..*)$',mut)
|
|
if m:
|
|
info['p']=m.group(1)
|
|
else:
|
|
m=re.search(r'(c\..*)$',mut)
|
|
info['p']=m.group(1)
|
|
info['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
|
A=[]
|
|
B=[]
|
|
C=[]
|
|
D=[]
|
|
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence='A'
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
A.extend(ds_new)
|
|
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='非适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence='C'
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
B.extend(ds_new)
|
|
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='.') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence=row['证据等级']
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
C.extend(ds_new)
|
|
for index,row in snv[(snv['AAChange.refGene']==mut) & snv['Response_Type_C'].str.contains("耐药")].iterrows():
|
|
evidence=''
|
|
if row['标签']=='非适应症':
|
|
evidence='C'
|
|
else:
|
|
evidence=row['证据等级']
|
|
ds=row['药物中文名'].split(",")
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
D.extend(ds_new)
|
|
A=sorted(set(A),key=A.index)
|
|
B=sorted(set(B),key=B.index)
|
|
C=sorted(set(C),key=C.index)
|
|
D=sorted(set(D),key=D.index)
|
|
info['drug_A']="\n".join(A)
|
|
info['drug_B']="\n".join(B)
|
|
info['drug_C']="\n".join(C)
|
|
info['drug_D']="\n".join(D)
|
|
if not info['drug_A']:
|
|
info['drug_A']='/'
|
|
if not info['drug_B']:
|
|
info['drug_B']='/'
|
|
if not info['drug_C']:
|
|
info['drug_C']='/'
|
|
if not info['drug_D']:
|
|
info['drug_D']='/'
|
|
context['list1'].append(info)
|
|
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
|
|
##genefunc
|
|
context['genefunc'][mut]=genefunction[gene.upper()]
|
|
##clingenes
|
|
clingene1={}
|
|
clingene2={}
|
|
for index,row in snv[(snv['AAChange.refGene']==mut)].iterrows():
|
|
if row['标签']=='适应症' or row['证据等级']=='B':
|
|
clingene1['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
|
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
|
|
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
|
else:
|
|
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
|
clingene1['aacid'] = '/'
|
|
clingene1['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
|
if re.match("nonsynonymous SNV",clingene1['muttype']):
|
|
clingene1['muttype']='错义突变'
|
|
elif re.search("^frameshift",clingene1['muttype']):
|
|
clingene1['muttype']='移码突变'
|
|
elif re.search("^nonframeshift",clingene1['muttype']):
|
|
clingene1['muttype']='非移码突变'
|
|
elif re.match("stopgain",clingene1['muttype']):
|
|
clingene1['muttype']='提前终止'
|
|
else:
|
|
clingene1['muttype']='/'
|
|
context['clingene1'].append(clingene1)
|
|
else:
|
|
clingene2['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
|
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
|
|
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
|
else:
|
|
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
|
|
clingene2['aacid'] = '/'
|
|
clingene2['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
|
|
if re.match("nonsynonymous SNV",clingene2['muttype']):
|
|
clingene2['muttype']='错义突变'
|
|
elif re.search("^frameshift",clingene2['muttype']):
|
|
clingene2['muttype']='移码突变'
|
|
elif re.search("^nonframeshift",clingene2['muttype']):
|
|
clingene2['muttype']='非移码突变'
|
|
elif re.match("stopgain",clingene2['muttype']):
|
|
clingene2['muttype']='提前终止'
|
|
else:
|
|
clingene2['muttype']='/'
|
|
context['clingene2'].append(clingene2)
|
|
break
|
|
context['list3'][gene]="\n".join(rt[gene])
|
|
else:
|
|
snv_size=0
|
|
|
|
|
|
##target vus and nontarget vus
|
|
|
|
snv_size_vus = os.path.getsize(snv_file_vus)
|
|
if snv_size_vus>0:
|
|
snv_vus=pd.read_table(snv_file_vus,sep="\t")
|
|
for index,row in snv_vus.iterrows():
|
|
if float(row['Freq'].replace('%',''))>=2:
|
|
nonclingene={}
|
|
if len(row['AAChange.refGene'].split(":")) == 5:
|
|
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'],nonclingene['aacid'])=row['AAChange.refGene'].split(":")
|
|
else:
|
|
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'])=row['AAChange.refGene'].split(":")
|
|
nonclingene['aacid'] = '/'
|
|
nonclingene['freq']=row['Freq']
|
|
nonclingene['muttype']=row['ExonicFunc.refGene']
|
|
if re.match("nonsynonymous SNV",nonclingene['muttype']):
|
|
nonclingene['muttype']='错义突变'
|
|
elif re.search("^frameshift",nonclingene['muttype']):
|
|
nonclingene['muttype']='移码突变'
|
|
elif re.search("^nonframeshift",nonclingene['muttype']):
|
|
nonclingene['muttype']='非移码突变'
|
|
elif re.match("stopgain",nonclingene['muttype']):
|
|
nonclingene['muttype']='提前终止'
|
|
else:
|
|
nonclingene['muttype']='/'
|
|
context['nonclingenes'].append(nonclingene)
|
|
|
|
|
|
##fusion处理
|
|
fusion_size = os.path.getsize(fusion_file)
|
|
if fusion_size>0:
|
|
fusion=pd.read_table(fusion_file,sep="\t")
|
|
cols=[index for index,row in fusion[fusion['可信']==0].iterrows()]
|
|
fusion.drop(cols,inplace=True)
|
|
genes=fusion['Gene_Symbol'].drop_duplicates()
|
|
if len(genes):
|
|
for gene in genes:
|
|
rt={}
|
|
rt[gene]=[]
|
|
fusions=fusion['FUSION'][fusion['Gene_Symbol']==gene].drop_duplicates()
|
|
for mut in fusions:
|
|
info2={mut:[]}
|
|
for index,row in fusion[fusion['FUSION']==mut].iterrows():
|
|
info3={}
|
|
info3['drug']=row['药物中文名']
|
|
info3['effect']=row['Response_Type_C']
|
|
info3['tumor']=row['疾病中文名']
|
|
info3['evidence']=row['Evidence_Source_C']
|
|
info3['sig']=row['EfficacyEvidence']
|
|
info2[mut].append(info3)
|
|
##sensitive_resistant_drug
|
|
drugs=row['Drug'].replace(" + ",",")
|
|
drugs=drugs.split(",")
|
|
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
|
drugs_chinese=drugs_chinese.split(",")
|
|
bool=0
|
|
for drug in drugs:
|
|
if drug.upper() in drug_disease.keys():
|
|
if re.search(r'敏感',row['Response_Type_C']):
|
|
sensitive_drug={}
|
|
sensitive_drug['name']=drugs_chinese[bool]
|
|
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
|
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
|
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
|
elif re.search(r'耐药',row['Response_Type_C']):
|
|
resistant_drug={}
|
|
resistant_drug['name']=drugs_chinese[bool]
|
|
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
|
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
|
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
|
bool+=1
|
|
context['list2'][mut]=info2[mut]
|
|
info={}
|
|
info['gene']=mut
|
|
info['p']='融合'
|
|
info['freq']="".join([str(fusion['FREQ1'][fusion['FUSION']==mut].reset_index(drop=True)[0]),'%'])
|
|
A=[]
|
|
B=[]
|
|
C=[]
|
|
D=[]
|
|
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence='A'
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
A.extend(ds_new)
|
|
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence='C'
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
B.extend(ds_new)
|
|
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence=row['证据等级']
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
C.extend(ds_new)
|
|
for index,row in fusion[(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].iterrows():
|
|
evidence=''
|
|
if row['标签']=='非适应症':
|
|
evidence='C'
|
|
else:
|
|
evidence=row['证据等级']
|
|
ds=row['药物中文名'].split(",")
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
D.extend(ds_new)
|
|
A=sorted(set(A),key=A.index)
|
|
B=sorted(set(B),key=B.index)
|
|
C=sorted(set(C),key=C.index)
|
|
D=sorted(set(D),key=D.index)
|
|
info['drug_A']="\n".join(A)
|
|
info['drug_B']="\n".join(B)
|
|
info['drug_C']="\n".join(C)
|
|
info['drug_D']="\n".join(D)
|
|
# info['drug_A']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
|
# info['drug_B']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
|
# info['drug_C']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
|
# info['drug_D']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
|
|
if not info['drug_A']:
|
|
info['drug_A']='/'
|
|
if not info['drug_B']:
|
|
info['drug_B']='/'
|
|
if not info['drug_C']:
|
|
info['drug_C']='/'
|
|
if not info['drug_D']:
|
|
info['drug_D']='/'
|
|
context['list1'].append(info)
|
|
rt[gene].append("".join([info['gene'],'(',info['freq'],')']))
|
|
##genefunc
|
|
context['genefunc'][mut]=genefunction[gene.upper()]
|
|
##clingenes
|
|
clingene1={}
|
|
clingene2={}
|
|
for index,row in fusion[(fusion['FUSION']==mut)].iterrows():
|
|
if row['标签']=='适应症' or row['证据等级']=='B':
|
|
clingene1['gene']= info['gene']
|
|
clingene1['freq']= info['freq']
|
|
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
|
|
clingene1['muttype']= '融合'
|
|
context['clingene1'].append(clingene1)
|
|
else:
|
|
clingene2['gene']= info['gene']
|
|
clingene2['freq']= info['freq']
|
|
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
|
|
clingene2['muttype']= '融合'
|
|
context['clingene2'].append(clingene2)
|
|
context['list3'][gene]="\n".join(rt[gene])
|
|
else:
|
|
fusion_size=0
|
|
|
|
|
|
##cnv处理
|
|
cnv_size = os.path.getsize(cnv_file)
|
|
if cnv_size>0:
|
|
cnv=pd.read_table(cnv_file,sep="\t")
|
|
cols=[index for index,row in cnv[cnv['可信']==0].iterrows()]
|
|
cnv.drop(cols,inplace=True)
|
|
genes=cnv['gene'].drop_duplicates()
|
|
if len(genes):
|
|
for gene in genes:
|
|
rt={}
|
|
rt[gene]=[]
|
|
cnvs=cnv['Gene_Symbol'][cnv['gene']==gene].drop_duplicates()
|
|
for mut in cnvs:
|
|
info={}
|
|
info['gene']=mut
|
|
copy=cnv['cn'][cnv['Gene_Symbol']==mut].reset_index(drop=True)[0]
|
|
if copy > 2:
|
|
info['p']='扩增'
|
|
else:
|
|
info['p']='缺失'
|
|
A=[]
|
|
B=[]
|
|
C=[]
|
|
D=[]
|
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence='A'
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
A.extend(ds_new)
|
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence='C'
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
B.extend(ds_new)
|
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
|
|
ds=row['药物中文名'].split(",")
|
|
evidence=row['证据等级']
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
C.extend(ds_new)
|
|
for index,row in cnv[(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].iterrows():
|
|
evidence=''
|
|
if row['标签']=='非适应症':
|
|
evidence='C'
|
|
else:
|
|
evidence=row['证据等级']
|
|
ds=row['药物中文名'].split(",")
|
|
ds_new=[str(x) + '【' + evidence + ' 级】'for x in ds]
|
|
D.extend(ds_new)
|
|
A=sorted(set(A),key=A.index)
|
|
B=sorted(set(B),key=B.index)
|
|
C=sorted(set(C),key=C.index)
|
|
D=sorted(set(D),key=D.index)
|
|
info['drug_A']="\n".join(A)
|
|
info['drug_B']="\n".join(B)
|
|
info['drug_C']="\n".join(C)
|
|
info['drug_D']="\n".join(D)
|
|
info['freq']=" ".join([str(copy),'拷贝'])
|
|
# info['drug_A']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
|
# info['drug_B']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
|
# info['drug_C']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
|
|
# info['drug_D']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
|
|
if not info['drug_A']:
|
|
info['drug_A']='/'
|
|
if not info['drug_B']:
|
|
info['drug_B']='/'
|
|
if not info['drug_C']:
|
|
info['drug_C']='/'
|
|
if not info['drug_D']:
|
|
info['drug_D']='/'
|
|
context['list1'].append(info)
|
|
info2={mut:[]}
|
|
for index,row in cnv[cnv['Gene_Symbol']==mut].iterrows():
|
|
info3={}
|
|
info3['drug']=row['药物中文名']
|
|
info3['effect']=row['Response_Type_C']
|
|
info3['tumor']=row['疾病中文名']
|
|
info3['evidence']=row['Evidence_Source_C']
|
|
info3['sig']=row['EfficacyEvidence']
|
|
info2[mut].append(info3)
|
|
##sensitive_resistant_drug
|
|
drugs=row['Drug'].replace(" + ",",")
|
|
drugs=drugs.split(",")
|
|
drugs_chinese=row['药物中文名'].replace(" + ",",")
|
|
drugs_chinese=drugs_chinese.split(",")
|
|
bool=0
|
|
for drug in drugs:
|
|
if drug.upper() in drug_disease.keys():
|
|
if re.search(r'敏感',row['Response_Type_C']):
|
|
sensitive_drug={}
|
|
sensitive_drug['name']=drugs_chinese[bool]
|
|
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
|
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
|
|
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
|
|
elif re.search(r'耐药',row['Response_Type_C']):
|
|
resistant_drug={}
|
|
resistant_drug['name']=drugs_chinese[bool]
|
|
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
|
|
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
|
|
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
|
|
bool+=1
|
|
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
|
|
context['list2'][" ".join([mut,info['p']])]=info2[mut]
|
|
##genefunc
|
|
context['genefunc'][" ".join([mut,info['p']])]=genefunction[mut.upper()]
|
|
##clingenes
|
|
clingene1={}
|
|
clingene2={}
|
|
for index,row in cnv[(cnv['Gene_Symbol']==mut)].iterrows():
|
|
if row['标签']=='适应症' or row['证据等级']=='B':
|
|
clingene1['gene']=info['gene']
|
|
clingene1['freq']= info['freq']
|
|
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
|
|
clingene1['muttype']= info['p']
|
|
context['clingene1'].append(clingene1)
|
|
else:
|
|
clingene2['gene']=info['gene']
|
|
clingene2['freq']= info['freq']
|
|
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
|
|
clingene2['muttype']= info['p']
|
|
context['clingene2'].append(clingene2)
|
|
context['list3'][gene]="\n".join(rt[gene])
|
|
else:
|
|
cnv_size=0
|
|
|
|
|
|
##msi
|
|
if Sample_type == 't':
|
|
msi_file=''.join([output_dir,'/MSI/',name,'.msi'])
|
|
msi=(open(msi_file,'r').readlines()[1]).split("\t")
|
|
context['msi_count']=msi[0]
|
|
context['msi_value']=round(float(msi[2].strip())/100,2)
|
|
if context['msi_value']>=0.3:
|
|
context['msi_result']='MSI-H'
|
|
context['msi_predict']='对免疫检查点抑制剂可能敏感'
|
|
else:
|
|
context['msi_result']='MSS'
|
|
context['msi_predict']='对免疫检查点抑制剂可能不敏感'
|
|
|
|
|
|
##MMR处理
|
|
mmr_file=''.join([output_dir,'/MMR/',name,"_mmr.txt"])
|
|
mmr_size = os.path.getsize(mmr_file)
|
|
mmr_result=0
|
|
mmr_result_summary=[]
|
|
if mmr_size>0:
|
|
mmr_fh=open(mmr_file,'r',encoding='utf-8').readlines()
|
|
for line in mmr_fh[1:]:
|
|
mmr={}
|
|
mmr['gene']= line.strip().split("\t")[0]
|
|
mmr['transcript'] = line.strip().split("\t")[1]
|
|
mmr['nacid'] = line.strip().split("\t")[2]
|
|
mmr['aacid'] = line.strip().split("\t")[3]
|
|
mmr['muttype'] = line.strip().split("\t")[5]
|
|
mmr['freq'] = line.strip().split("\t")[4]
|
|
mmr['sig'] = line.strip().split("\t")[6]
|
|
if mmr['sig'] != '意义未明突变':
|
|
mmr_result+=1
|
|
mmr_result_summary.append(mmr['gene'] + ' ' + mmr['aacid'])
|
|
context['mmr'].append(mmr)
|
|
|
|
context['mmr_result']=mmr_result
|
|
mmr_result_summary=' | '.join(mmr_result_summary)
|
|
if mmr_result_summary:
|
|
context['mmr_result_summary']=mrr_result_summary
|
|
context['mmr_predict']="对免疫检查点抑制剂可能敏感"
|
|
else:
|
|
context['mmr_result_summary']="未检测到相关基因突变"
|
|
context['mmr_predict']="对免疫检查点抑制剂可能不敏感"
|
|
|
|
|
|
|
|
##chemo
|
|
chemo_file=''.join([output_dir,'/chemo/',name,".drug.res.txt"])
|
|
chemo_fh=open(chemo_file,'r')
|
|
bool=0
|
|
chemos=chemo_fh.readlines()[1:]
|
|
chemo_result=0
|
|
recommend_drug=[]
|
|
normal_drug=[]
|
|
restrict_drug=[]
|
|
while bool<len(chemos):
|
|
chemo={}
|
|
lines=chemos[bool].strip().split("\t")
|
|
chemo['bool1']=bool+1
|
|
chemo['name1']=lines[0]
|
|
chemo['result1']=lines[4]
|
|
if lines[3] == '推荐':
|
|
# chemo['result1']=RichText('推荐使用', color='00FF00')
|
|
chemo_result+=1
|
|
recommend_drug.append(chemo['name1'])
|
|
elif lines[3] == '常规':
|
|
# chemo['result1']=RichText('常规使用')
|
|
normal_drug.append(chemo['name1'])
|
|
elif lines[3] == '谨慎':
|
|
# chemo['result1']=RichText('谨慎使用', color='FF0000')
|
|
restrict_drug.append(chemo['name1'])
|
|
if bool+1<len(chemos):
|
|
lines=chemos[bool+1].strip().split("\t")
|
|
chemo['bool2']=bool+2
|
|
chemo['name2']=lines[0]
|
|
chemo['result2']=lines[4]
|
|
if lines[3] == '推荐':
|
|
# chemo['result2']=RichText('推荐使用', color='00FF00')
|
|
chemo_result+=1
|
|
recommend_drug.append(chemo['name2'])
|
|
elif lines[3] == '常规':
|
|
# chemo['result2']=RichText('常规使用')
|
|
normal_drug.append(chemo['name2'])
|
|
elif lines[3] == '谨慎':
|
|
# chemo['result2']=RichText('谨慎使用', color='FF0000')
|
|
restrict_drug.append(chemo['name2'])
|
|
# print(chemo)
|
|
context['chemo'].append(chemo)
|
|
bool+=2
|
|
context['chemo_result']=chemo_result
|
|
chemo_detail=[]
|
|
chemo_detail_file=''.join([output_dir,'/chemo/',name,".drug.infos.txt"])
|
|
chemo_data=pd.read_table(chemo_detail_file,sep="\t")
|
|
chemo_drugs=chemo_data['药物'].drop_duplicates()
|
|
context['recommend_drug']=','.join(recommend_drug)
|
|
context['normal_drug']=','.join(normal_drug)
|
|
context['restrict_drug']=','.join(restrict_drug)
|
|
|
|
for drug in chemo_drugs:
|
|
c={}
|
|
c['drug']=drug
|
|
c['info']=[]
|
|
for index,row in chemo_data[chemo_data['药物'] == drug].iterrows():
|
|
d={}
|
|
d['gene']=row['检测基因']
|
|
d['site']=row['检测位点']
|
|
d['gt']=row['基因型']
|
|
d['level']=row['证据等级']
|
|
d['sig']=row['用药提示']
|
|
c['info'].append(d)
|
|
chemo_detail.append(c)
|
|
|
|
context['chemo_detail']=chemo_detail
|
|
context['sensitive_resistant_drug']=sensitive_resistant_drug
|
|
|
|
##化疗联合用药
|
|
chemo_comb_file=''.join([output_dir,'/chemo/',name,".chemo.comb.txt"])
|
|
chemo=pd.read_table(chemo_comb_file,sep="\t")
|
|
type=chemo['癌种'].drop_duplicates()
|
|
|
|
bool=-1
|
|
chemo_comb=[]
|
|
for i in type:
|
|
bool+=1
|
|
chemo_comb.append({'type':i,'drug':[]})
|
|
for index,row in chemo[chemo['癌种']==i].iterrows():
|
|
info={}
|
|
info['name']=row['用药方案']
|
|
info['abbr']=row['方案缩写']
|
|
info['sig']=row['临床提示']
|
|
chemo_comb[bool]['drug'].append(info)
|
|
context['chemo_comb']=chemo_comb
|
|
|
|
##hereditary cancer
|
|
if os.path.exists(''.join([output_dir,'/hereditary/',name,'.hereditary.pre.txt'])):
|
|
context['hereditary_cancer_1']=[]
|
|
context['hereditary_cancer_2']=[]
|
|
hereditary_file1=''.join([output_dir,'/hereditary/',name,".hereditary.txt"])
|
|
hereditary_file2=''.join([output_dir,'/hereditary/',name,".risk.txt"])
|
|
hereditary_file1_fh=open(hereditary_file1,'r')
|
|
hereditary_file2_fh=open(hereditary_file2,'r')
|
|
|
|
hereditary_result=0
|
|
hereditary_result_summary=[]
|
|
hereditary_disease=[]
|
|
if os.path.getsize(hereditary_file1)>0:
|
|
hereditary_file1_fh=open(hereditary_file1,'r')
|
|
for line in hereditary_file1_fh.readlines()[1:]:
|
|
hereditary_cancer_1={}
|
|
lines=line.strip().split("\t")
|
|
hereditary_cancer_1['gene']=lines[0]
|
|
hereditary_cancer_1['syndrome']=lines[1]
|
|
hereditary_cancer_1['hereditary_type']=lines[2]
|
|
hereditary_cancer_1['type']=lines[3]
|
|
hereditary_cancer_1['result']=lines[4]
|
|
hereditary_result+=len(lines[4].split(";"))
|
|
hereditary_result_summary.append(hereditary_cancer_1['gene'] + ' ' + hereditary_cancer_1['result'])
|
|
hereditary_disease.append(hereditary_cancer_1['syndrome'])
|
|
context['hereditary_cancer_1'].append(hereditary_cancer_1)
|
|
|
|
context['hereditary_result']=hereditary_result
|
|
if hereditary_result_summary:
|
|
context['hereditary_disease']=';'.join(hereditary_disease)
|
|
context['hereditary_result_summary']=' | '.join(hereditary_result_summary)
|
|
else:
|
|
context['hereditary_disease']='/'
|
|
context['hereditary_result_summary']='未检测到相关基因突变'
|
|
|
|
hereditary_risk=[]
|
|
bool=0
|
|
heres=hereditary_file2_fh.readlines()[1:]
|
|
while bool<len(heres):
|
|
hereditary_cancer_2={}
|
|
lines=(heres[bool]).strip().split("\t")
|
|
hereditary_cancer_2['type1']=lines[0]
|
|
if lines[1] == '偏高':
|
|
hereditary_risk.append(lines[0])
|
|
hereditary_cancer_2['risk1']=RichText('偏高', color='FF0000')
|
|
elif lines[1] == '同一般人群':
|
|
hereditary_cancer_2['risk1']=RichText('同一般人群')
|
|
if bool+1<len(heres):
|
|
lines=heres[bool+1].strip().split("\t")
|
|
hereditary_cancer_2['type2']=lines[0]
|
|
if lines[1] == '偏高':
|
|
hereditary_cancer_2['risk2']=RichText('偏高', color='FF0000')
|
|
elif lines[1] == '同一般人群':
|
|
hereditary_cancer_2['risk2']=RichText('同一般人群')
|
|
context['hereditary_cancer_2'].append(hereditary_cancer_2)
|
|
bool+=2
|
|
|
|
if hereditary_risk:
|
|
context['hereditary_risk']=','.join(hereditary_risk) + '风险可能较高'
|
|
else:
|
|
context['hereditary_risk']='风险同一般人群'
|
|
if len(context['hereditary_cancer_1'])==0:
|
|
context['hereditary_cancer_1']=[{'gene':'/','syndrome':'/','hereditary_type':'/','type':'/','result':'/'}]
|
|
|
|
##可能获益的临床药物处理
|
|
total_drug_count=[]
|
|
for i in context['list1']:
|
|
if i['drug_A'] != '/':
|
|
for j in i['drug_A'].split("\n"):
|
|
if j not in total_drug_count:
|
|
total_drug_count.append(j)
|
|
if i['drug_B'] != '/':
|
|
for j in i['drug_B'].split("\n"):
|
|
if j not in total_drug_count:
|
|
total_drug_count.append(j)
|
|
if i['drug_C'] != '/':
|
|
for j in i['drug_C'].split("\n"):
|
|
if j not in total_drug_count:
|
|
total_drug_count.append(j)
|
|
|
|
context['total_drug_count']=len(total_drug_count)
|
|
|
|
##去重处理
|
|
context['clingenes1']=[]
|
|
context['clingenes2']=[]
|
|
for i in context['clingene1']:
|
|
if i not in context['clingenes1']:
|
|
context['clingenes1'].append(i)
|
|
|
|
for i in context['clingene2']:
|
|
if i not in context['clingenes2'] and i not in context['clingenes1']:
|
|
context['clingenes2'].append(i)
|
|
|
|
##检测到的基因变异
|
|
context['total_mut_count']=len(context['clingenes1'])+len(context['clingenes2'])
|
|
|
|
|
|
##本癌种FDA/NMPA/NCCN批准基因检测
|
|
indication_fh=open(indication_file,'r',encoding='utf-8').readlines()
|
|
indication_genes=[]
|
|
for line in indication_fh[1:]:
|
|
indication={}
|
|
indication['gene']=line.strip().split("\t")[0]
|
|
indication['content']=line.strip().split("\t")[1]
|
|
if indication['gene'] in context['list3'].keys():
|
|
indication['result']=RichText(context['list3'][indication['gene']], color='FF0000')
|
|
else:
|
|
indication['result']='未检出变异'
|
|
context['indication'].append(indication)
|
|
|
|
##qc处理
|
|
qc_file=''.join([output_dir,'/qc/',name,'_qc.txt'])
|
|
qc=pd.read_table(qc_file,sep="\t",header=None,index_col=0,names=['A','B'])
|
|
|
|
Q30=qc.loc['Q30(%)','B']
|
|
if Q30>=85:
|
|
Q30_result='合格'
|
|
else:
|
|
Q30_result='警戒'
|
|
|
|
depth=qc.loc['mean_depth(dedup)','B']
|
|
if Sample_type == 'c':
|
|
if depth>=1000:
|
|
depth_result='合格'
|
|
else:
|
|
depth_result='警戒'
|
|
elif Sample_type == 't':
|
|
if depth>=500:
|
|
depth_result='合格'
|
|
else:
|
|
depth_result='警戒'
|
|
|
|
uniformity=qc.loc['coverage(>=0.2*meanx)','B']
|
|
if uniformity>=90:
|
|
uniformity_result='合格'
|
|
else:
|
|
uniformity_result='警戒'
|
|
|
|
if Q30_result=='合格' and depth_result=='合格' and uniformity_result=='合格':
|
|
context['qc_result']='合格'
|
|
else:
|
|
context['qc_result']='警戒'
|
|
|
|
context['Q30']=Q30
|
|
context['Q30_result']=Q30_result
|
|
context['depth']=depth
|
|
context['uniformity']=uniformity
|
|
context['depth_result']=depth_result
|
|
context['uniformity_result']=uniformity_result
|
|
|
|
|
|
|
|
##阴性模块处理
|
|
##list1
|
|
if len(context['list1'])==0:
|
|
context['list1']=[{'gene':'/','freq':'/','drug_A':'/','drug_B':'/','drug_C':'/','drug_D':'/'}]
|
|
|
|
##list2
|
|
if len(context['list2'])==0:
|
|
context['list2']= {'/':[{'drug':'/','effect':'/','tumor':'/','evidence':'/','sig':'/'}]}
|
|
context['genefunc']['/']='/'
|
|
|
|
|
|
##clingenes,nonclingenes
|
|
if len(context['clingenes1'])==0:
|
|
context['clingenes1']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
|
if len(context['clingenes2'])==0:
|
|
context['clingenes2']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
|
if len(context['nonclingenes'])==0:
|
|
context['nonclingenes']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
|
|
|
|
##sensitive_resistant_drug
|
|
if len(context['sensitive_resistant_drug'][0]['drug'])==0:
|
|
context['sensitive_resistant_drug'][0]['drug']=[{'name':'/','mechanism':'/'}]
|
|
if len(context['sensitive_resistant_drug'][1]['drug'])==0:
|
|
context['sensitive_resistant_drug'][1]['drug']=[{'name':'/','mechanism':'/'}]
|
|
|
|
##mmr
|
|
if len(context['mmr'])==0:
|
|
context['mmr']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','muttype':'/','freq':'/','sig':'/'}]
|
|
|
|
##info
|
|
post_file=''.join([output_dir,'/qc/',name,'_post.json'])
|
|
sex='/'
|
|
age='/'
|
|
phone='/'
|
|
medical_history='/'
|
|
family_history='/'
|
|
sample_id=name
|
|
sample_type='/'
|
|
report_date=time.strftime("%Y-%m-%d", time.localtime())
|
|
arrival_date=report_date
|
|
cancer_type='/'
|
|
pathologic_diagnosis='/'
|
|
|
|
if os.path.isfile(post_file):
|
|
# if post_file.exists():
|
|
post_fh=open(post_file,'r')
|
|
post=json.load(post_fh)
|
|
name=post["data"][0]["name"]
|
|
sex=post["data"][0]["gender"]
|
|
age=post["data"][0]["age"]
|
|
medical_history=post["data"][0]["treatHistory"]
|
|
family_history=post["data"][0]["sickFamilyHistory"]
|
|
sample_id=post["data"][0]["barcode"]
|
|
# sample_id_control=post["data"][0]["barcode_N"]
|
|
sample_type=post["data"][0]["source"]
|
|
# sample_type_control=post["data"][0]["source_N"]
|
|
# sample_type_control = post["data"][0].get('source_N', '/')
|
|
arrival_date=post["data"][0]["receiveTime"].split(' ')[0]
|
|
cancer_type=post["data"][0]["zlType"]
|
|
pathologic_diagnosis=post["data"][0]["treatResult"]
|
|
|
|
context['info']={
|
|
'name':name,
|
|
'sex':sex,
|
|
'age':age,
|
|
'phone':phone,
|
|
'medical_history':medical_history,
|
|
'family_history':family_history,
|
|
'sample_id':sample_id,
|
|
'sample_type':sample_type,
|
|
'report_date':report_date,
|
|
'arrival_date':arrival_date,
|
|
'cancer_type':cancer_type,
|
|
'pathologic_diagnosis':pathologic_diagnosis}
|
|
context['report_time']=report_date
|
|
|
|
##模板替换
|
|
file_real = os.path.realpath(sys.argv[0])
|
|
Exe_Path = os.path.dirname(file_real)
|
|
|
|
report_template={'lung85gene':{'t':'lung85-tissue-oem.docx','c':'lung85-blood-oem.docx'},
|
|
'crc88gene':{'t':'CRC88-tissue-oem.docx','c':'CRC88-blood-oem.docx'}}
|
|
doc_full = os.path.join(Exe_Path, report_template[projcet][Sample_type])
|
|
|
|
doc = DocxTemplate(doc_full)
|
|
|
|
doc.render(context)
|
|
doc.save(report_file)
|