pipeline/script/qc_check_tissue.py

89 lines
4.3 KiB
Python
Executable File

#!/usr/bin/env python3
#-*-coding:utf-8-*-
import sys
import os
import re
import pandas as pd
from pandas import DataFrame,Series
if len(sys.argv) < 4:
print("usage:python3 qc_check_tissue.py output_dir tumor normal name")
sys.exit()
output_dir=sys.argv[1]
tumor=sys.argv[2]
normal=sys.argv[3]
name=sys.argv[4]
qc_file_tumor=''.join((output_dir,'/qc/',tumor,'_qc.txt'))
qc_t=pd.read_csv(qc_file_tumor,sep="\t",index_col=0,header=None).T
qc_file_normal=''.join((output_dir,'/qc/',normal,'_qc.txt'))
qc_n=pd.read_csv(qc_file_normal,sep="\t",index_col=0,header=None).T
##concordance
cd_file=''.join((output_dir,'/qc/',name,'_concordance.txt'))
cmd_cd=' '.join(("sed","-n",'1p',cd_file))
cd=os.popen(cmd_cd).read()
cd=re.search(r'Concordance: (\d+)\.',cd,re.M|re.I)
concordence=cd.group(1)
##contamination
ct_file=''.join((output_dir,'/qc/',name,'_contamination.txt'))
cmd_ct_n=' '.join(("sed","-n",'1p',ct_file))
ct_n=os.popen(cmd_ct_n).read()
ct_n=re.search(r'Normal sample contamination level: (.*)%',ct_n,re.M|re.I)
contamination_n=ct_n.group(1)
cmd_ct_t=' '.join(("sed","-n",'2p',ct_file))
ct_t=os.popen(cmd_ct_t).read()
ct_t=re.search(r'Tumor sample contamination level: (.*)%',ct_t,re.M|re.I)
contamination_t=ct_t.group(1)
##tumor_content
#content_file=''.join((output_dir,'/qc/sequenza/',name,'_confints_CP.txt'))
#cmd_content=' '.join(("sed","-n",'2p',content_file,'|','cut','-f1'))
#content=os.popen(cmd_content).read().strip()
out_file=''.join((output_dir,'/qc/qc_fail.txt'))
with open(out_file,'w') as out:
##tumor
if float(qc_t['Q30(%)'][1]) < 85:
out.write('\t'.join(("Tumor:","Q30(%)",str(qc_t['Q30(%)'][1]),"(<85%)\n")))
if float(qc_t['clean_reads_rate(%)'][1]) < 90:
out.write('\t'.join(("Tumor:","clean_reads_rate(%)",str(qc_t['clean_reads_rate(%)'][1]),"(<90%)\n")))
if float(qc_t['mapped_rate(%)'][1]) < 90:
out.write('\t'.join(("Tumor:","mapped_rate(%)",str(qc_t['mapped_rate(%)'][1]),"(<90%)\n")))
if float(qc_t['capture_rate(reads)'][1]) < 50:
out.write('\t'.join(("Tumor:","capture_rate(reads)",str(qc_t['capture_rate(reads)'][1]),"(<50%)\n")))
if float(qc_t['mean_depth(dedup)'][1]) < 500:
out.write('\t'.join(("Tumor:","mean_depth(dedup)",str(qc_t['mean_depth(dedup)'][1]),"(<500X)\n")))
if float(qc_t['coverage(>=80%)'][1]) < 500:
out.write('\t'.join(("Tumor:","coverage(>=80%)",str(qc_t['coverage(>=80%)'][1]),"(<500X)\n")))
if float(qc_t['coverage(>=0.2*meanx)'][1]) < 95:
out.write('\t'.join(("Tumor:","coverage(>=0.2*meanx)",str(qc_t['coverage(>=0.2*meanx)'][1]),"(<95%)\n")))
if float(qc_t['coverage(>10x)'][1]) < 95:
out.write('\t'.join(("Tumor:","coverage(>10x)",str(qc_t['coverage(>10x)'][1]),"(<95%)\n")))
##normal
if float(qc_n['Q30(%)'][1]) < 85:
out.write('\t'.join(("Normal:","Q30(%)",str(qc_n['Q30(%)'][1]),"(<85%)\n")))
if float(qc_n['clean_reads_rate(%)'][1]) < 90:
out.write('\t'.join(("Normal:","clean_reads_rate(%)",str(qc_n['clean_reads_rate(%)'][1]),"(<90%)\n")))
if float(qc_n['mapped_rate(%)'][1]) < 90:
out.write('\t'.join(("Normal:","mapped_rate(%)",str(qc_n['mapped_rate(%)'][1]),"(<90%)\n")))
if float(qc_n['capture_rate(reads)'][1]) < 50:
out.write('\t'.join(("Normal:","capture_rate(reads)",str(qc_n['capture_rate(reads)'][1]),"(<50%)\n")))
if float(qc_n['coverage(>=80%)'][1]) < 100:
out.write('\t'.join(("Normal:","coverage(>=80%)",str(qc_n['coverage(>=80%)'][1]),"(<100X)\n")))
if float(qc_n['coverage(>=0.2*meanx)'][1]) < 95:
out.write('\t'.join(("Normal:","coverage(>=0.2*meanx)",str(qc_n['coverage(>=0.2*meanx)'][1]),"(<95%)\n")))
if float(qc_n['coverage(>10x)'][1]) < 95:
out.write('\t'.join(("Normal:","coverage(>10x)",str(qc_n['coverage(>10x)'][1]),"(<95%)\n")))
if float(concordence) < 99:
out.write('\t'.join(("concordance(%)",str(concordence),"(<99%)\n")))
if float(contamination_n) > 1:
out.write('\t'.join(("Normal sample contamination level(%):",str(contamination_n),"(>1%)\n")))
if float(contamination_t) > 1:
out.write('\t'.join(("Tumor sample contamination level(%):",str(contamination_t),"(>1%)\n")))
# if float(content) < 0.2:
# out.write('\t'.join(("Tumor content(%):",str(float(content)*100),"(<20%)\n")))
if qc_t['gender'][1] != '性别一致':
out.write(qc_t['gender'][1]+"\n")