report/scripts/comcancergene_report.py

865 lines
38 KiB
Python

#!/usr/bin/python3
# -*- coding: UTF-8 -*-
##https://www.pianshen.com/article/5314917437/
##https://zhuanlan.zhihu.com/p/366902690
##https://itpcb.com/a/277599
import docxtpl
from docx.shared import Mm
from docxtpl import DocxTemplate,RichText
import pandas as pd
from pandas import DataFrame
import re
import sys
import os
import json
import time
if len(sys.argv) != 5:
print(" ".join(['usage:python',sys.argv[0],'output_dir','tumor','sample_type(t for tissue,c for cfdna)']),'projcet')
sys.exit()
output_dir=sys.argv[1]
name=sys.argv[2]
Sample_type=sys.argv[3]
projcet=sys.argv[4]
snv_base="".join([name,'.snvindel.pos.dedup.txt'])
snv_file='/'.join([output_dir,'mutation',snv_base])
snv_base_vus="".join([name,'.snvindel.vus.txt'])
snv_file_vus='/'.join([output_dir,'mutation',snv_base_vus])
fusion_base="".join([name,'.fusion.pos.dedup.txt'])
fusion_file='/'.join([output_dir,'fusion',fusion_base])
cnv_base="".join([name,'.cnv.pos.dedup.txt'])
cnv_file='/'.join([output_dir,'cnvkit',cnv_base])
qc_base=''.join([name,'_qc.txt'])
qc_file='/'.join([output_dir,'qc',qc_base])
report_base="".join([name,'_report.docx'])
report_file='/'.join([output_dir,'report',report_base])
indication_file="".join([output_dir,'/mutation/','indication.txt'])
context = {'list1':[],'list2':{},'list3':{},'clingene1':[],'clingene2':[],'nonclingenes':[],'genefunc':{},
'indication':[],'mmr':[],'chemo':[]}
#genefunction
genefunction={}
gf=open("/dataseq/jmdna/codes/reportbase/gene_function.txt",'r',encoding='utf-8').readlines()
for line in gf[1:]:
gene=line.strip().split("\t")[0]
func=line.strip().split("\t")[1]
genefunction[gene.upper()]=func
##sensitive_resistant_drug
drug_disease={}
drug_mechanism={}
drug_fh=open("/dataseq/jmdna/codes/reportbase/target_drug.txt",'r',encoding='utf-8').readlines()
for line in drug_fh[1:]:
disease=line.split("\t")[8]
mechanism=line.split("\t")[11]
drugs=line.split("\t")[0].split('|')
if disease or mechanism:
for drug in drugs:
drug_disease[drug.upper()]=disease
drug_mechanism[drug.upper()]=mechanism
sensitive_resistant_drug=[{'type':'可能敏感药物','drug':[]},{'type':'可能耐药药物','drug':[]}]
##somatic snvindel处理
snv_size = os.path.getsize(snv_file)
if snv_size>0:
snv=pd.read_table(snv_file,sep="\t")
cols=[index for index,row in snv[snv['可信']==0].iterrows()]
snv.drop(cols,inplace=True)
genes=snv['Gene.refGene'].drop_duplicates()
if len(genes):
for gene in genes:
rt={}
rt[gene]=[]
muts=snv['AAChange.refGene'][snv['Gene.refGene']==gene].drop_duplicates()
for mut in muts:
info2={mut:[]}
for index,row in snv[snv['AAChange.refGene']==mut].iterrows():
info3={}
info3['drug']=row['药物中文名']
info3['effect']=row['Response_Type_C']
info3['tumor']=row['疾病中文名']
info3['evidence']=row['Evidence_Source_C']
info3['sig']=row['EfficacyEvidence']
info2[mut].append(info3)
##sensitive_resistant_drug
drugs=row['Drug'].replace(" + ",",")
drugs=drugs.split(",")
drugs_chinese=row['药物中文名'].replace(" + ",",")
drugs_chinese=drugs_chinese.split(",")
bool=0
for drug in drugs:
if drug.upper() in drug_disease.keys():
if re.search(r'敏感',row['Response_Type_C']):
sensitive_drug={}
sensitive_drug['name']=drugs_chinese[bool]
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
elif re.search(r'耐药',row['Response_Type_C']):
resistant_drug={}
resistant_drug['name']=drugs_chinese[bool]
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
bool+=1
context['list2'][mut]=info2[mut]
info={}
info['gene']=snv['Gene.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
m=re.search(r'(p\..*)$',mut)
if m:
info['p']=m.group(1)
else:
m=re.search(r'(c\..*)$',mut)
info['p']=m.group(1)
info['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
A=[]
B=[]
C=[]
D=[]
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence='A'
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
A.extend(ds_new)
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='非适应症') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence='C'
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
B.extend(ds_new)
for index,row in snv[(snv['AAChange.refGene']==mut) & (snv['标签']=='.') & snv['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence=row['证据等级']
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
C.extend(ds_new)
for index,row in snv[(snv['AAChange.refGene']==mut) & snv['Response_Type_C'].str.contains("耐药")].iterrows():
evidence=''
if row['标签']=='非适应症':
evidence='C'
else:
evidence=row['证据等级']
ds=row['药物中文名'].split(",")
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
D.extend(ds_new)
A=sorted(set(A),key=A.index)
B=sorted(set(B),key=B.index)
C=sorted(set(C),key=C.index)
D=sorted(set(D),key=D.index)
info['drug_A']="\n".join(A)
info['drug_B']="\n".join(B)
info['drug_C']="\n".join(C)
info['drug_D']="\n".join(D)
if not info['drug_A']:
info['drug_A']='/'
if not info['drug_B']:
info['drug_B']='/'
if not info['drug_C']:
info['drug_C']='/'
if not info['drug_D']:
info['drug_D']='/'
context['list1'].append(info)
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
##genefunc
context['genefunc'][mut]=genefunction[gene.upper()]
##clingenes
clingene1={}
clingene2={}
for index,row in snv[(snv['AAChange.refGene']==mut)].iterrows():
if row['标签']=='适应症' or row['证据等级']=='B':
clingene1['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
else:
(clingene1['gene'],clingene1['transcript'],clingene1['exon'],clingene1['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
clingene1['aacid'] = '/'
clingene1['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
if re.match("nonsynonymous SNV",clingene1['muttype']):
clingene1['muttype']='错义突变'
elif re.search("^frameshift",clingene1['muttype']):
clingene1['muttype']='移码突变'
elif re.search("^nonframeshift",clingene1['muttype']):
clingene1['muttype']='非移码突变'
elif re.match("stopgain",clingene1['muttype']):
clingene1['muttype']='提前终止'
else:
clingene1['muttype']='/'
context['clingene1'].append(clingene1)
else:
clingene2['freq']=snv['Freq'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
if len((snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")) == 5:
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
else:
(clingene2['gene'],clingene2['transcript'],clingene2['exon'],clingene2['nacid'])=(snv['AAChange.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]).split(":")
clingene2['aacid'] = '/'
clingene2['muttype']=snv['ExonicFunc.refGene'][snv['AAChange.refGene']==mut].reset_index(drop=True)[0]
if re.match("nonsynonymous SNV",clingene2['muttype']):
clingene2['muttype']='错义突变'
elif re.search("^frameshift",clingene2['muttype']):
clingene2['muttype']='移码突变'
elif re.search("^nonframeshift",clingene2['muttype']):
clingene2['muttype']='非移码突变'
elif re.match("stopgain",clingene2['muttype']):
clingene2['muttype']='提前终止'
else:
clingene2['muttype']='/'
context['clingene2'].append(clingene2)
break
context['list3'][gene]="\n".join(rt[gene])
else:
snv_size=0
##target vus and nontarget vus
snv_size_vus = os.path.getsize(snv_file_vus)
if snv_size_vus>0:
snv_vus=pd.read_table(snv_file_vus,sep="\t")
for index,row in snv_vus.iterrows():
if float(row['Freq'].replace('%',''))>=2:
nonclingene={}
if len(row['AAChange.refGene'].split(":")) == 5:
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'],nonclingene['aacid'])=row['AAChange.refGene'].split(":")
else:
(nonclingene['gene'],nonclingene['transcript'],nonclingene['exon'],nonclingene['nacid'])=row['AAChange.refGene'].split(":")
nonclingene['aacid'] = '/'
nonclingene['freq']=row['Freq']
nonclingene['muttype']=row['ExonicFunc.refGene']
if re.match("nonsynonymous SNV",nonclingene['muttype']):
nonclingene['muttype']='错义突变'
elif re.search("^frameshift",nonclingene['muttype']):
nonclingene['muttype']='移码突变'
elif re.search("^nonframeshift",nonclingene['muttype']):
nonclingene['muttype']='非移码突变'
elif re.match("stopgain",nonclingene['muttype']):
nonclingene['muttype']='提前终止'
else:
nonclingene['muttype']='/'
context['nonclingenes'].append(nonclingene)
##fusion处理
fusion_size = os.path.getsize(fusion_file)
if fusion_size>0:
fusion=pd.read_table(fusion_file,sep="\t")
cols=[index for index,row in fusion[fusion['可信']==0].iterrows()]
fusion.drop(cols,inplace=True)
genes=fusion['Gene_Symbol'].drop_duplicates()
if len(genes):
for gene in genes:
rt={}
rt[gene]=[]
fusions=fusion['FUSION'][fusion['Gene_Symbol']==gene].drop_duplicates()
for mut in fusions:
info2={mut:[]}
for index,row in fusion[fusion['FUSION']==mut].iterrows():
info3={}
info3['drug']=row['药物中文名']
info3['effect']=row['Response_Type_C']
info3['tumor']=row['疾病中文名']
info3['evidence']=row['Evidence_Source_C']
info3['sig']=row['EfficacyEvidence']
info2[mut].append(info3)
##sensitive_resistant_drug
drugs=row['Drug'].replace(" + ",",")
drugs=drugs.split(",")
drugs_chinese=row['药物中文名'].replace(" + ",",")
drugs_chinese=drugs_chinese.split(",")
bool=0
for drug in drugs:
if drug.upper() in drug_disease.keys():
if re.search(r'敏感',row['Response_Type_C']):
sensitive_drug={}
sensitive_drug['name']=drugs_chinese[bool]
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
elif re.search(r'耐药',row['Response_Type_C']):
resistant_drug={}
resistant_drug['name']=drugs_chinese[bool]
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
bool+=1
context['list2'][mut]=info2[mut]
info={}
info['gene']=mut
info['p']='融合'
info['freq']="".join([str(fusion['FREQ1'][fusion['FUSION']==mut].reset_index(drop=True)[0]),'%'])
A=[]
B=[]
C=[]
D=[]
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence='A'
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
A.extend(ds_new)
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence='C'
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
B.extend(ds_new)
for index,row in fusion[(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence=row['证据等级']
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
C.extend(ds_new)
for index,row in fusion[(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].iterrows():
evidence=''
if row['标签']=='非适应症':
evidence='C'
else:
evidence=row['证据等级']
ds=row['药物中文名'].split(",")
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
D.extend(ds_new)
A=sorted(set(A),key=A.index)
B=sorted(set(B),key=B.index)
C=sorted(set(C),key=C.index)
D=sorted(set(D),key=D.index)
info['drug_A']="\n".join(A)
info['drug_B']="\n".join(B)
info['drug_C']="\n".join(C)
info['drug_D']="\n".join(D)
# info['drug_A']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
# info['drug_B']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='非适应症') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
# info['drug_C']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & (fusion['标签']=='.') & fusion['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
# info['drug_D']="\n".join(list(set(fusion['药物中文名'][(fusion['FUSION']==mut) & fusion['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
if not info['drug_A']:
info['drug_A']='/'
if not info['drug_B']:
info['drug_B']='/'
if not info['drug_C']:
info['drug_C']='/'
if not info['drug_D']:
info['drug_D']='/'
context['list1'].append(info)
rt[gene].append("".join([info['gene'],'(',info['freq'],')']))
##genefunc
context['genefunc'][mut]=genefunction[gene.upper()]
##clingenes
clingene1={}
clingene2={}
for index,row in fusion[(fusion['FUSION']==mut)].iterrows():
if row['标签']=='适应症' or row['证据等级']=='B':
clingene1['gene']= info['gene']
clingene1['freq']= info['freq']
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
clingene1['muttype']= '融合'
context['clingene1'].append(clingene1)
else:
clingene2['gene']= info['gene']
clingene2['freq']= info['freq']
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
clingene2['muttype']= '融合'
context['clingene2'].append(clingene2)
context['list3'][gene]="\n".join(rt[gene])
else:
fusion_size=0
##cnv处理
cnv_size = os.path.getsize(cnv_file)
if cnv_size>0:
cnv=pd.read_table(cnv_file,sep="\t")
cols=[index for index,row in cnv[cnv['可信']==0].iterrows()]
cnv.drop(cols,inplace=True)
genes=cnv['gene'].drop_duplicates()
if len(genes):
for gene in genes:
rt={}
rt[gene]=[]
cnvs=cnv['Gene_Symbol'][cnv['gene']==gene].drop_duplicates()
for mut in cnvs:
info={}
info['gene']=mut
copy=cnv['cn'][cnv['Gene_Symbol']==mut].reset_index(drop=True)[0]
if copy > 2:
info['p']='扩增'
else:
info['p']='缺失'
A=[]
B=[]
C=[]
D=[]
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence='A'
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
A.extend(ds_new)
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence='C'
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
B.extend(ds_new)
for index,row in cnv[(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].iterrows():
ds=row['药物中文名'].split(",")
evidence=row['证据等级']
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
C.extend(ds_new)
for index,row in cnv[(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].iterrows():
evidence=''
if row['标签']=='非适应症':
evidence='C'
else:
evidence=row['证据等级']
ds=row['药物中文名'].split(",")
ds_new=[str(x) + '' + evidence + ' 级】'for x in ds]
D.extend(ds_new)
A=sorted(set(A),key=A.index)
B=sorted(set(B),key=B.index)
C=sorted(set(C),key=C.index)
D=sorted(set(D),key=D.index)
info['drug_A']="\n".join(A)
info['drug_B']="\n".join(B)
info['drug_C']="\n".join(C)
info['drug_D']="\n".join(D)
info['freq']=" ".join([str(copy),'拷贝'])
# info['drug_A']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
# info['drug_B']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='非适应症') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
# info['drug_C']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & (cnv['标签']=='.') & cnv['Response_Type_C'].str.contains("敏感")].str.cat(sep=",").split(","))))
# info['drug_D']="\n".join(list(set(cnv['药物中文名'][(cnv['Gene_Symbol']==mut) & cnv['Response_Type_C'].str.contains("耐药")].str.cat(sep=",").split(","))))
if not info['drug_A']:
info['drug_A']='/'
if not info['drug_B']:
info['drug_B']='/'
if not info['drug_C']:
info['drug_C']='/'
if not info['drug_D']:
info['drug_D']='/'
context['list1'].append(info)
info2={mut:[]}
for index,row in cnv[cnv['Gene_Symbol']==mut].iterrows():
info3={}
info3['drug']=row['药物中文名']
info3['effect']=row['Response_Type_C']
info3['tumor']=row['疾病中文名']
info3['evidence']=row['Evidence_Source_C']
info3['sig']=row['EfficacyEvidence']
info2[mut].append(info3)
##sensitive_resistant_drug
drugs=row['Drug'].replace(" + ",",")
drugs=drugs.split(",")
drugs_chinese=row['药物中文名'].replace(" + ",",")
drugs_chinese=drugs_chinese.split(",")
bool=0
for drug in drugs:
if drug.upper() in drug_disease.keys():
if re.search(r'敏感',row['Response_Type_C']):
sensitive_drug={}
sensitive_drug['name']=drugs_chinese[bool]
sensitive_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
if sensitive_drug not in sensitive_resistant_drug[0]['drug']:
sensitive_resistant_drug[0]['drug'].append(sensitive_drug)
elif re.search(r'耐药',row['Response_Type_C']):
resistant_drug={}
resistant_drug['name']=drugs_chinese[bool]
resistant_drug['mechanism']=("\n".join([drug_disease[drug.upper()],drug_mechanism[drug.upper()]])).strip()
if resistant_drug not in sensitive_resistant_drug[1]['drug']:
sensitive_resistant_drug[1]['drug'].append(resistant_drug)
bool+=1
rt[gene].append("".join([info['p'],'(',info['freq'],')']))
context['list2'][" ".join([mut,info['p']])]=info2[mut]
##genefunc
context['genefunc'][" ".join([mut,info['p']])]=genefunction[mut.upper()]
##clingenes
clingene1={}
clingene2={}
for index,row in cnv[(cnv['Gene_Symbol']==mut)].iterrows():
if row['标签']=='适应症' or row['证据等级']=='B':
clingene1['gene']=info['gene']
clingene1['freq']= info['freq']
(clingene1['transcript'],clingene1['exon'],clingene1['nacid'],clingene1['aacid'])=("/","/","/","/")
clingene1['muttype']= info['p']
context['clingene1'].append(clingene1)
else:
clingene2['gene']=info['gene']
clingene2['freq']= info['freq']
(clingene2['transcript'],clingene2['exon'],clingene2['nacid'],clingene2['aacid'])=("/","/","/","/")
clingene2['muttype']= info['p']
context['clingene2'].append(clingene2)
context['list3'][gene]="\n".join(rt[gene])
else:
cnv_size=0
##msi
if Sample_type == 't':
msi_file=''.join([output_dir,'/MSI/',name,'.msi'])
msi=(open(msi_file,'r').readlines()[1]).split("\t")
context['msi_count']=msi[0]
context['msi_value']=round(float(msi[2].strip())/100,2)
if context['msi_value']>=0.3:
context['msi_result']='MSI-H'
context['msi_predict']='对免疫检查点抑制剂可能敏感'
else:
context['msi_result']='MSS'
context['msi_predict']='对免疫检查点抑制剂可能不敏感'
##MMR处理
mmr_file=''.join([output_dir,'/MMR/',name,"_mmr.txt"])
mmr_size = os.path.getsize(mmr_file)
mmr_result=0
mmr_result_summary=[]
if mmr_size>0:
mmr_fh=open(mmr_file,'r',encoding='utf-8').readlines()
for line in mmr_fh[1:]:
mmr={}
mmr['gene']= line.strip().split("\t")[0]
mmr['transcript'] = line.strip().split("\t")[1]
mmr['nacid'] = line.strip().split("\t")[2]
mmr['aacid'] = line.strip().split("\t")[3]
mmr['muttype'] = line.strip().split("\t")[5]
mmr['freq'] = line.strip().split("\t")[4]
mmr['sig'] = line.strip().split("\t")[6]
if mmr['sig'] != '意义未明突变':
mmr_result+=1
mmr_result_summary.append(mmr['gene'] + ' ' + mmr['aacid'])
context['mmr'].append(mmr)
context['mmr_result']=mmr_result
mmr_result_summary=' | '.join(mmr_result_summary)
if mmr_result_summary:
context['mmr_result_summary']=mrr_result_summary
context['mmr_predict']="对免疫检查点抑制剂可能敏感"
else:
context['mmr_result_summary']="未检测到相关基因突变"
context['mmr_predict']="对免疫检查点抑制剂可能不敏感"
##chemo
chemo_file=''.join([output_dir,'/chemo/',name,".drug.res.txt"])
chemo_fh=open(chemo_file,'r')
bool=0
chemos=chemo_fh.readlines()[1:]
chemo_result=0
recommend_drug=[]
normal_drug=[]
restrict_drug=[]
while bool<len(chemos):
chemo={}
lines=chemos[bool].strip().split("\t")
chemo['bool1']=bool+1
chemo['name1']=lines[0]
chemo['result1']=lines[4]
if lines[3] == '推荐':
# chemo['result1']=RichText('推荐使用', color='00FF00')
chemo_result+=1
recommend_drug.append(chemo['name1'])
elif lines[3] == '常规':
# chemo['result1']=RichText('常规使用')
normal_drug.append(chemo['name1'])
elif lines[3] == '谨慎':
# chemo['result1']=RichText('谨慎使用', color='FF0000')
restrict_drug.append(chemo['name1'])
if bool+1<len(chemos):
lines=chemos[bool+1].strip().split("\t")
chemo['bool2']=bool+2
chemo['name2']=lines[0]
chemo['result2']=lines[4]
if lines[3] == '推荐':
# chemo['result2']=RichText('推荐使用', color='00FF00')
chemo_result+=1
recommend_drug.append(chemo['name2'])
elif lines[3] == '常规':
# chemo['result2']=RichText('常规使用')
normal_drug.append(chemo['name2'])
elif lines[3] == '谨慎':
# chemo['result2']=RichText('谨慎使用', color='FF0000')
restrict_drug.append(chemo['name2'])
# print(chemo)
context['chemo'].append(chemo)
bool+=2
context['chemo_result']=chemo_result
chemo_detail=[]
chemo_detail_file=''.join([output_dir,'/chemo/',name,".drug.infos.txt"])
chemo_data=pd.read_table(chemo_detail_file,sep="\t")
chemo_drugs=chemo_data['药物'].drop_duplicates()
context['recommend_drug']=','.join(recommend_drug)
context['normal_drug']=','.join(normal_drug)
context['restrict_drug']=','.join(restrict_drug)
for drug in chemo_drugs:
c={}
c['drug']=drug
c['info']=[]
for index,row in chemo_data[chemo_data['药物'] == drug].iterrows():
d={}
d['gene']=row['检测基因']
d['site']=row['检测位点']
d['gt']=row['基因型']
d['level']=row['证据等级']
d['sig']=row['用药提示']
c['info'].append(d)
chemo_detail.append(c)
context['chemo_detail']=chemo_detail
context['sensitive_resistant_drug']=sensitive_resistant_drug
##化疗联合用药
chemo_comb_file=''.join([output_dir,'/chemo/',name,".chemo.comb.txt"])
chemo=pd.read_table(chemo_comb_file,sep="\t")
type=chemo['癌种'].drop_duplicates()
bool=-1
chemo_comb=[]
for i in type:
bool+=1
chemo_comb.append({'type':i,'drug':[]})
for index,row in chemo[chemo['癌种']==i].iterrows():
info={}
info['name']=row['用药方案']
info['abbr']=row['方案缩写']
info['sig']=row['临床提示']
chemo_comb[bool]['drug'].append(info)
context['chemo_comb']=chemo_comb
##hereditary cancer
if os.path.exists(''.join([output_dir,'/hereditary/',name,'.hereditary.pre.txt'])):
context['hereditary_cancer_1']=[]
context['hereditary_cancer_2']=[]
hereditary_file1=''.join([output_dir,'/hereditary/',name,".hereditary.txt"])
hereditary_file2=''.join([output_dir,'/hereditary/',name,".risk.txt"])
hereditary_file1_fh=open(hereditary_file1,'r')
hereditary_file2_fh=open(hereditary_file2,'r')
hereditary_result=0
hereditary_result_summary=[]
hereditary_disease=[]
if os.path.getsize(hereditary_file1)>0:
hereditary_file1_fh=open(hereditary_file1,'r')
for line in hereditary_file1_fh.readlines()[1:]:
hereditary_cancer_1={}
lines=line.strip().split("\t")
hereditary_cancer_1['gene']=lines[0]
hereditary_cancer_1['syndrome']=lines[1]
hereditary_cancer_1['hereditary_type']=lines[2]
hereditary_cancer_1['type']=lines[3]
hereditary_cancer_1['result']=lines[4]
hereditary_result+=len(lines[4].split(";"))
hereditary_result_summary.append(hereditary_cancer_1['gene'] + ' ' + hereditary_cancer_1['result'])
hereditary_disease.append(hereditary_cancer_1['syndrome'])
context['hereditary_cancer_1'].append(hereditary_cancer_1)
context['hereditary_result']=hereditary_result
if hereditary_result_summary:
context['hereditary_disease']=';'.join(hereditary_disease)
context['hereditary_result_summary']=' | '.join(hereditary_result_summary)
else:
context['hereditary_disease']='/'
context['hereditary_result_summary']='未检测到相关基因突变'
hereditary_risk=[]
bool=0
heres=hereditary_file2_fh.readlines()[1:]
while bool<len(heres):
hereditary_cancer_2={}
lines=(heres[bool]).strip().split("\t")
hereditary_cancer_2['type1']=lines[0]
if lines[1] == '偏高':
hereditary_risk.append(lines[0])
hereditary_cancer_2['risk1']=RichText('偏高', color='FF0000')
elif lines[1] == '同一般人群':
hereditary_cancer_2['risk1']=RichText('同一般人群')
if bool+1<len(heres):
lines=heres[bool+1].strip().split("\t")
hereditary_cancer_2['type2']=lines[0]
if lines[1] == '偏高':
hereditary_cancer_2['risk2']=RichText('偏高', color='FF0000')
elif lines[1] == '同一般人群':
hereditary_cancer_2['risk2']=RichText('同一般人群')
context['hereditary_cancer_2'].append(hereditary_cancer_2)
bool+=2
if hereditary_risk:
context['hereditary_risk']=','.join(hereditary_risk) + '风险可能较高'
else:
context['hereditary_risk']='风险同一般人群'
if len(context['hereditary_cancer_1'])==0:
context['hereditary_cancer_1']=[{'gene':'/','syndrome':'/','hereditary_type':'/','type':'/','result':'/'}]
##可能获益的临床药物处理
total_drug_count=[]
for i in context['list1']:
if i['drug_A'] != '/':
for j in i['drug_A'].split("\n"):
if j not in total_drug_count:
total_drug_count.append(j)
if i['drug_B'] != '/':
for j in i['drug_B'].split("\n"):
if j not in total_drug_count:
total_drug_count.append(j)
if i['drug_C'] != '/':
for j in i['drug_C'].split("\n"):
if j not in total_drug_count:
total_drug_count.append(j)
context['total_drug_count']=len(total_drug_count)
##去重处理
context['clingenes1']=[]
context['clingenes2']=[]
for i in context['clingene1']:
if i not in context['clingenes1']:
context['clingenes1'].append(i)
for i in context['clingene2']:
if i not in context['clingenes2'] and i not in context['clingenes1']:
context['clingenes2'].append(i)
##检测到的基因变异
context['total_mut_count']=len(context['clingenes1'])+len(context['clingenes2'])
##本癌种FDA/NMPA/NCCN批准基因检测
indication_fh=open(indication_file,'r',encoding='utf-8').readlines()
indication_genes=[]
for line in indication_fh[1:]:
indication={}
indication['gene']=line.strip().split("\t")[0]
indication['content']=line.strip().split("\t")[1]
if indication['gene'] in context['list3'].keys():
indication['result']=RichText(context['list3'][indication['gene']], color='FF0000')
else:
indication['result']='未检出变异'
context['indication'].append(indication)
##qc处理
qc_file=''.join([output_dir,'/qc/',name,'_qc.txt'])
qc=pd.read_table(qc_file,sep="\t",header=None,index_col=0,names=['A','B'])
Q30=qc.loc['Q30(%)','B']
if Q30>=85:
Q30_result='合格'
else:
Q30_result='警戒'
depth=qc.loc['mean_depth(dedup)','B']
if Sample_type == 'c':
if depth>=1000:
depth_result='合格'
else:
depth_result='警戒'
elif Sample_type == 't':
if depth>=500:
depth_result='合格'
else:
depth_result='警戒'
uniformity=qc.loc['coverage(>=0.2*meanx)','B']
if uniformity>=90:
uniformity_result='合格'
else:
uniformity_result='警戒'
if Q30_result=='合格' and depth_result=='合格' and uniformity_result=='合格':
context['qc_result']='合格'
else:
context['qc_result']='警戒'
context['Q30']=Q30
context['Q30_result']=Q30_result
context['depth']=depth
context['uniformity']=uniformity
context['depth_result']=depth_result
context['uniformity_result']=uniformity_result
##阴性模块处理
##list1
if len(context['list1'])==0:
context['list1']=[{'gene':'/','freq':'/','drug_A':'/','drug_B':'/','drug_C':'/','drug_D':'/'}]
##list2
if len(context['list2'])==0:
context['list2']= {'/':[{'drug':'/','effect':'/','tumor':'/','evidence':'/','sig':'/'}]}
context['genefunc']['/']='/'
##clingenes,nonclingenes
if len(context['clingenes1'])==0:
context['clingenes1']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
if len(context['clingenes2'])==0:
context['clingenes2']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
if len(context['nonclingenes'])==0:
context['nonclingenes']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','exon':'/','muttype':'/','freq':'/'}]
##sensitive_resistant_drug
if len(context['sensitive_resistant_drug'][0]['drug'])==0:
context['sensitive_resistant_drug'][0]['drug']=[{'name':'/','mechanism':'/'}]
if len(context['sensitive_resistant_drug'][1]['drug'])==0:
context['sensitive_resistant_drug'][1]['drug']=[{'name':'/','mechanism':'/'}]
##mmr
if len(context['mmr'])==0:
context['mmr']=[{'gene':'/','transcript':'/','nacid':'/','aacid':'/','muttype':'/','freq':'/','sig':'/'}]
##info
post_file=''.join([output_dir,'/qc/',name,'_post.json'])
sex='/'
age='/'
phone='/'
medical_history='/'
family_history='/'
sample_id=name
sample_type='/'
report_date=time.strftime("%Y-%m-%d", time.localtime())
arrival_date=report_date
cancer_type='/'
pathologic_diagnosis='/'
if os.path.isfile(post_file):
# if post_file.exists():
post_fh=open(post_file,'r')
post=json.load(post_fh)
name=post["data"][0]["name"]
sex=post["data"][0]["gender"]
age=post["data"][0]["age"]
medical_history=post["data"][0]["treatHistory"]
family_history=post["data"][0]["sickFamilyHistory"]
sample_id=post["data"][0]["barcode"]
# sample_id_control=post["data"][0]["barcode_N"]
sample_type=post["data"][0]["source"]
# sample_type_control=post["data"][0]["source_N"]
# sample_type_control = post["data"][0].get('source_N', '/')
arrival_date=post["data"][0]["receiveTime"].split(' ')[0]
cancer_type=post["data"][0]["zlType"]
pathologic_diagnosis=post["data"][0]["treatResult"]
context['info']={
'name':name,
'sex':sex,
'age':age,
'phone':phone,
'medical_history':medical_history,
'family_history':family_history,
'sample_id':sample_id,
'sample_type':sample_type,
'report_date':report_date,
'arrival_date':arrival_date,
'cancer_type':cancer_type,
'pathologic_diagnosis':pathologic_diagnosis}
context['report_time']=report_date
##模板替换
file_real = os.path.realpath(sys.argv[0])
Exe_Path = os.path.dirname(file_real)
report_template={'lung85gene':{'t':'lung85-tissue-oem.docx','c':'lung85-blood-oem.docx'},
'crc88gene':{'t':'CRC88-tissue-oem.docx','c':'CRC88-blood-oem.docx'}}
doc_full = os.path.join(Exe_Path, report_template[projcet][Sample_type])
doc = DocxTemplate(doc_full)
doc.render(context)
doc.save(report_file)