pipeline/codes/qc_check.py

101 lines
4.8 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
import argparse
import json
import os.path
import pandas as pd
def check(outputf, tumorf, normalf=None, genderf=None, cms=None):
# qc_check = {
# 'Q30(%)': 85,
# 'clean_reads_rate(%)': 90,
# 'mapped_rate(%)': 90,
# 'capture_rate(reads)': 50,
# 'mean_depth(dedup)': 500,
# 'mean_depth(dedup)_umi': 1000,
# 'coverage(>=80%)': 500,
# 'coverage(>=80%)_umi': 1000,
# 'coverage(>=0.2*meanx)': 95,
# 'coverage(>10x)': 90,
# 'coverage(>=0.1*meanx)': 90,
# 'concordence': 99,
# 'contamination_n': 1,
# 'contamination_t': 1,
# }
qc_t = pd.read_csv(tumorf, sep="\t", index_col=0, header=None).T
out = open(outputf, 'w')
##tumor
if float(qc_t['Q30(%)'][1]) < 85:
out.write('\t'.join(("Tumor:", "Q30(%)", str(qc_t['Q30(%)'][1]), "(<85%)\n")))
if float(qc_t['clean_reads_rate(%)'][1]) < 90:
out.write('\t'.join(("Tumor:", "clean_reads_rate(%)", str(qc_t['clean_reads_rate(%)'][1]), "(<90%)\n")))
if float(qc_t['mapped_rate(%)'][1]) < 90:
out.write('\t'.join(("Tumor:", "mapped_rate(%)", str(qc_t['mapped_rate(%)'][1]), "(<90%)\n")))
if float(qc_t['capture_rate(reads)'][1]) < 50:
out.write('\t'.join(("Tumor:", "capture_rate(reads)", str(qc_t['capture_rate(reads)'][1]), "(<50%)\n")))
if float(qc_t['mean_depth(dedup)'][1]) < 500:
out.write('\t'.join(("Tumor:", "mean_depth(dedup)", str(qc_t['mean_depth(dedup)'][1]), "(<500X)\n")))
if float(qc_t['coverage(>=80%)'][1]) < 500:
out.write('\t'.join(("Tumor:", "coverage(>=80%)", str(qc_t['coverage(>=80%)'][1]), "(<500X)\n")))
if float(qc_t['coverage(>=0.2*meanx)'][1]) < 95:
out.write('\t'.join(("Tumor:", "coverage(>=0.2*meanx)", str(qc_t['coverage(>=0.2*meanx)'][1]), "(<95%)\n")))
if float(qc_t['coverage(>10x)'][1]) < 95:
out.write('\t'.join(("Tumor:", "coverage(>10x)", str(qc_t['coverage(>10x)'][1]), "(<95%)\n")))
if os.path.exists(normalf):
qc_n = pd.read_csv(normalf, sep="\t", index_col=0, header=None).T
##normal
if float(qc_n['Q30(%)'][1]) < 85:
out.write('\t'.join(("Normal:", "Q30(%)", str(qc_n['Q30(%)'][1]), "(<85%)\n")))
if float(qc_n['clean_reads_rate(%)'][1]) < 90:
out.write('\t'.join(("Normal:", "clean_reads_rate(%)", str(qc_n['clean_reads_rate(%)'][1]), "(<90%)\n")))
if float(qc_n['mapped_rate(%)'][1]) < 90:
out.write('\t'.join(("Normal:", "mapped_rate(%)", str(qc_n['mapped_rate(%)'][1]), "(<90%)\n")))
if float(qc_n['capture_rate(reads)'][1]) < 50:
out.write('\t'.join(("Normal:", "capture_rate(reads)", str(qc_n['capture_rate(reads)'][1]), "(<50%)\n")))
if float(qc_n['coverage(>=80%)'][1]) < 100:
out.write('\t'.join(("Normal:", "coverage(>=80%)", str(qc_n['coverage(>=80%)'][1]), "(<100X)\n")))
if float(qc_n['coverage(>=0.2*meanx)'][1]) < 95:
out.write(
'\t'.join(("Normal:", "coverage(>=0.2*meanx)", str(qc_n['coverage(>=0.2*meanx)'][1]), "(<95%)\n")))
if float(qc_n['coverage(>10x)'][1]) < 95:
out.write('\t'.join(("Normal:", "coverage(>10x)", str(qc_n['coverage(>10x)'][1]), "(<95%)\n")))
if os.path.exists(str(genderf)):
genderf_df = pd.read_csv(genderf, sep='\t')
genderf_dict = genderf_df.to_dict(orient='records')[0]
gender_cms = ''
if os.path.exists(str(cms)):
cmsf = open(cms, "r")
cmsj = json.load(cmsf)
gender_cms_data = cmsj.get('data', [])
if gender_cms_data:
gender_cms = gender_cms_data[0].get('gender', '')
if gender_cms:
genderf_dict['gender_cms'] = gender_cms
genderf_r = [k for k, v in genderf_dict.items() if list(genderf_dict.values()).count(v) == 1]
if genderf_r:
out.write('\t'.join(("性别鉴定不一致:", str(genderf_dict))))
else:
out.write('\t'.join(("性别鉴定:", 'lims系统提供信息有误请核查')))
else:
out.write('\t'.join(("性别鉴定:", '流程未进行性别鉴定评估')))
out.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Check qcfile")
parser.add_argument('-n', '--barcode', help="sample's qcfile", required=True)
parser.add_argument('-s', '--normal', help="normal's file", default='', required=False, nargs='?')
parser.add_argument('-p', '--gender', help="gender file", default='', required=False, nargs='?')
parser.add_argument('-c', '--cms', help="cms's file", default='', required=False, nargs='?')
parser.add_argument('-o', '--output_file', help="Output directory, default ", required=True)
args = parser.parse_args()
check(args.output_file, args.barcode, args.normal, args.gender, args.cms)