#!/usr/bin/env python3 #-*-coding:utf-8-*- import sys import os import re import pandas as pd from pandas import DataFrame,Series if len(sys.argv) < 4: print("usage:python3 qc_check_tissue.py output_dir tumor normal name") sys.exit() output_dir=sys.argv[1] tumor=sys.argv[2] normal=sys.argv[3] name=sys.argv[4] qc_file_tumor=''.join((output_dir,'/qc/',tumor,'_qc.txt')) qc_t=pd.read_csv(qc_file_tumor,sep="\t",index_col=0,header=None).T qc_file_normal=''.join((output_dir,'/qc/',normal,'_qc.txt')) qc_n=pd.read_csv(qc_file_normal,sep="\t",index_col=0,header=None).T ##concordance cd_file=''.join((output_dir,'/qc/',name,'_concordance.txt')) cmd_cd=' '.join(("sed","-n",'1p',cd_file)) cd=os.popen(cmd_cd).read() cd=re.search(r'Concordance: (\d+)\.',cd,re.M|re.I) concordence=cd.group(1) ##contamination ct_file=''.join((output_dir,'/qc/',name,'_contamination.txt')) cmd_ct_n=' '.join(("sed","-n",'1p',ct_file)) ct_n=os.popen(cmd_ct_n).read() ct_n=re.search(r'Normal sample contamination level: (.*)%',ct_n,re.M|re.I) contamination_n=ct_n.group(1) cmd_ct_t=' '.join(("sed","-n",'2p',ct_file)) ct_t=os.popen(cmd_ct_t).read() ct_t=re.search(r'Tumor sample contamination level: (.*)%',ct_t,re.M|re.I) contamination_t=ct_t.group(1) ##tumor_content #content_file=''.join((output_dir,'/qc/sequenza/',name,'_confints_CP.txt')) #cmd_content=' '.join(("sed","-n",'2p',content_file,'|','cut','-f1')) #content=os.popen(cmd_content).read().strip() out_file=''.join((output_dir,'/qc/qc_fail.txt')) with open(out_file,'w') as out: ##tumor if float(qc_t['Q30(%)'][1]) < 85: out.write('\t'.join(("Tumor:","Q30(%)",str(qc_t['Q30(%)'][1]),"(<85%)\n"))) if float(qc_t['clean_reads_rate(%)'][1]) < 90: out.write('\t'.join(("Tumor:","clean_reads_rate(%)",str(qc_t['clean_reads_rate(%)'][1]),"(<90%)\n"))) if float(qc_t['mapped_rate(%)'][1]) < 90: out.write('\t'.join(("Tumor:","mapped_rate(%)",str(qc_t['mapped_rate(%)'][1]),"(<90%)\n"))) if float(qc_t['capture_rate(reads)'][1]) < 50: out.write('\t'.join(("Tumor:","capture_rate(reads)",str(qc_t['capture_rate(reads)'][1]),"(<50%)\n"))) if float(qc_t['mean_depth(dedup)'][1]) < 500: out.write('\t'.join(("Tumor:","mean_depth(dedup)",str(qc_t['mean_depth(dedup)'][1]),"(<500X)\n"))) if float(qc_t['coverage(>=80%)'][1]) < 500: out.write('\t'.join(("Tumor:","coverage(>=80%)",str(qc_t['coverage(>=80%)'][1]),"(<500X)\n"))) if float(qc_t['coverage(>=0.2*meanx)'][1]) < 95: out.write('\t'.join(("Tumor:","coverage(>=0.2*meanx)",str(qc_t['coverage(>=0.2*meanx)'][1]),"(<95%)\n"))) if float(qc_t['coverage(>10x)'][1]) < 95: out.write('\t'.join(("Tumor:","coverage(>10x)",str(qc_t['coverage(>10x)'][1]),"(<95%)\n"))) ##normal if float(qc_n['Q30(%)'][1]) < 85: out.write('\t'.join(("Normal:","Q30(%)",str(qc_n['Q30(%)'][1]),"(<85%)\n"))) if float(qc_n['clean_reads_rate(%)'][1]) < 90: out.write('\t'.join(("Normal:","clean_reads_rate(%)",str(qc_n['clean_reads_rate(%)'][1]),"(<90%)\n"))) if float(qc_n['mapped_rate(%)'][1]) < 90: out.write('\t'.join(("Normal:","mapped_rate(%)",str(qc_n['mapped_rate(%)'][1]),"(<90%)\n"))) if float(qc_n['capture_rate(reads)'][1]) < 50: out.write('\t'.join(("Normal:","capture_rate(reads)",str(qc_n['capture_rate(reads)'][1]),"(<50%)\n"))) if float(qc_n['coverage(>=80%)'][1]) < 100: out.write('\t'.join(("Normal:","coverage(>=80%)",str(qc_n['coverage(>=80%)'][1]),"(<100X)\n"))) if float(qc_n['coverage(>=0.2*meanx)'][1]) < 95: out.write('\t'.join(("Normal:","coverage(>=0.2*meanx)",str(qc_n['coverage(>=0.2*meanx)'][1]),"(<95%)\n"))) if float(qc_n['coverage(>10x)'][1]) < 95: out.write('\t'.join(("Normal:","coverage(>10x)",str(qc_n['coverage(>10x)'][1]),"(<95%)\n"))) if float(concordence) < 99: out.write('\t'.join(("concordance(%)",str(concordence),"(<99%)\n"))) if float(contamination_n) > 1: out.write('\t'.join(("Normal sample contamination level(%):",str(contamination_n),"(>1%)\n"))) if float(contamination_t) > 1: out.write('\t'.join(("Tumor sample contamination level(%):",str(contamination_t),"(>1%)\n"))) # if float(content) < 0.2: # out.write('\t'.join(("Tumor content(%):",str(float(content)*100),"(<20%)\n"))) if qc_t['gender'][1] != '性别一致': out.write(qc_t['gender'][1]+"\n")