From 055078fc602c4f16113900e8b56fb2564a0d726b Mon Sep 17 00:00:00 2001 From: chaopower Date: Thu, 22 Feb 2024 16:45:41 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0qcfail?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- codes/qc_check.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 codes/qc_check.py diff --git a/codes/qc_check.py b/codes/qc_check.py new file mode 100755 index 0000000..ee2a553 --- /dev/null +++ b/codes/qc_check.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os.path + +import pandas as pd + + +def check(outputf, tumorf, normalf=None, genderf=None, cms=None): + # qc_check = { + # 'Q30(%)': 85, + # 'clean_reads_rate(%)': 90, + # 'mapped_rate(%)': 90, + # 'capture_rate(reads)': 50, + # 'mean_depth(dedup)': 500, + # 'mean_depth(dedup)_umi': 1000, + # 'coverage(>=80%)': 500, + # 'coverage(>=80%)_umi': 1000, + # 'coverage(>=0.2*meanx)': 95, + # 'coverage(>10x)': 90, + # 'coverage(>=0.1*meanx)': 90, + # 'concordence': 99, + # 'contamination_n': 1, + # 'contamination_t': 1, + # } + + qc_t = pd.read_csv(tumorf, sep="\t", index_col=0, header=None).T + + out = open(outputf, 'w') + + ##tumor + if float(qc_t['Q30(%)'][1]) < 85: + out.write('\t'.join(("Tumor:", "Q30(%)", str(qc_t['Q30(%)'][1]), "(<85%)\n"))) + if float(qc_t['clean_reads_rate(%)'][1]) < 90: + out.write('\t'.join(("Tumor:", "clean_reads_rate(%)", str(qc_t['clean_reads_rate(%)'][1]), "(<90%)\n"))) + if float(qc_t['mapped_rate(%)'][1]) < 90: + out.write('\t'.join(("Tumor:", "mapped_rate(%)", str(qc_t['mapped_rate(%)'][1]), "(<90%)\n"))) + if float(qc_t['capture_rate(reads)'][1]) < 50: + out.write('\t'.join(("Tumor:", "capture_rate(reads)", str(qc_t['capture_rate(reads)'][1]), "(<50%)\n"))) + if float(qc_t['mean_depth(dedup)'][1]) < 500: + out.write('\t'.join(("Tumor:", "mean_depth(dedup)", str(qc_t['mean_depth(dedup)'][1]), "(<500X)\n"))) + if float(qc_t['coverage(>=80%)'][1]) < 500: + out.write('\t'.join(("Tumor:", "coverage(>=80%)", str(qc_t['coverage(>=80%)'][1]), "(<500X)\n"))) + if float(qc_t['coverage(>=0.2*meanx)'][1]) < 95: + out.write('\t'.join(("Tumor:", "coverage(>=0.2*meanx)", str(qc_t['coverage(>=0.2*meanx)'][1]), "(<95%)\n"))) + if float(qc_t['coverage(>10x)'][1]) < 95: + out.write('\t'.join(("Tumor:", "coverage(>10x)", str(qc_t['coverage(>10x)'][1]), "(<95%)\n"))) + + if os.path.exists(normalf): + qc_n = pd.read_csv(normalf, sep="\t", index_col=0, header=None).T + ##normal + if float(qc_n['Q30(%)'][1]) < 85: + out.write('\t'.join(("Normal:", "Q30(%)", str(qc_n['Q30(%)'][1]), "(<85%)\n"))) + if float(qc_n['clean_reads_rate(%)'][1]) < 90: + out.write('\t'.join(("Normal:", "clean_reads_rate(%)", str(qc_n['clean_reads_rate(%)'][1]), "(<90%)\n"))) + if float(qc_n['mapped_rate(%)'][1]) < 90: + out.write('\t'.join(("Normal:", "mapped_rate(%)", str(qc_n['mapped_rate(%)'][1]), "(<90%)\n"))) + if float(qc_n['capture_rate(reads)'][1]) < 50: + out.write('\t'.join(("Normal:", "capture_rate(reads)", str(qc_n['capture_rate(reads)'][1]), "(<50%)\n"))) + if float(qc_n['coverage(>=80%)'][1]) < 100: + out.write('\t'.join(("Normal:", "coverage(>=80%)", str(qc_n['coverage(>=80%)'][1]), "(<100X)\n"))) + if float(qc_n['coverage(>=0.2*meanx)'][1]) < 95: + out.write( + '\t'.join(("Normal:", "coverage(>=0.2*meanx)", str(qc_n['coverage(>=0.2*meanx)'][1]), "(<95%)\n"))) + if float(qc_n['coverage(>10x)'][1]) < 95: + out.write('\t'.join(("Normal:", "coverage(>10x)", str(qc_n['coverage(>10x)'][1]), "(<95%)\n"))) + if os.path.exists(str(genderf)): + genderf_df = pd.read_csv(genderf, sep='\t') + genderf_dict = genderf_df.to_dict(orient='records')[0] + gender_cms = '' + if os.path.exists(str(cms)): + cmsf = open(cms, "r") + cmsj = json.load(cmsf) + gender_cms_data = cmsj.get('data', []) + if gender_cms_data: + gender_cms = gender_cms_data[0].get('gender', '') + if gender_cms: + genderf_dict['gender_cms'] = gender_cms + genderf_r = [k for k, v in genderf_dict.items() if list(genderf_dict.values()).count(v) == 1] + if genderf_r: + out.write('\t'.join(("性别鉴定不一致:", str(genderf_dict)))) + else: + out.write('\t'.join(("性别鉴定:", 'lims系统提供信息有误,请核查'))) + else: + out.write('\t'.join(("性别鉴定:", '流程未进行性别鉴定评估'))) + + out.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Check qcfile") + parser.add_argument('-n', '--barcode', help="sample's qcfile", required=True) + parser.add_argument('-s', '--normal', help="normal's file", default='', required=False, nargs='?') + parser.add_argument('-p', '--gender', help="gender file", default='', required=False, nargs='?') + parser.add_argument('-c', '--cms', help="cms's file", default='', required=False, nargs='?') + parser.add_argument('-o', '--output_file', help="Output directory, default ", required=True) + + args = parser.parse_args() + check(args.output_file, args.barcode, args.normal, args.gender, args.cms)