master
parent
ab17cb58df
commit
2d4b2ad7a2
|
|
@ -0,0 +1,103 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:utf-8-*-
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
if len(sys.argv) != 6:
|
||||
print("usage:python3 qc.py fastp_json_file bamdst_out sort_bamdst_out genecore_json output ")
|
||||
sys.exit()
|
||||
|
||||
# out_dir=sys.argv[1]
|
||||
# name=sys.argv[2]
|
||||
|
||||
fastp_json = sys.argv[1]
|
||||
rmdup_bamdst_out = sys.argv[2]
|
||||
sort_bamdst_out = sys.argv[3]
|
||||
|
||||
genecore_json = sys.argv[4]
|
||||
|
||||
out = open(sys.argv[5], 'w')
|
||||
|
||||
# fastp_json=''.join((out_dir,'/',name,'.json'))
|
||||
# rmdup_json=''.join((out_dir,'/',name,'_rmdup.json'))
|
||||
# sorted_cov=''.join((out_dir,'/',name,'_sorted_bamdst/coverage.report'))
|
||||
# rmdup_cov=''.join((out_dir,'/',name,'_rmdup_bamdst/coverage.report'))
|
||||
# rmdup_depth=''.join((out_dir,'/',name,'_rmdup_bamdst/depth_distribution.plot'))
|
||||
# depth_tsv=''.join((out_dir,'/',name,'_rmdup_bamdst/depth.tsv.gz'))
|
||||
# rmdup_insert=''.join((out_dir,'/',name,'_rmdup_bamdst/insertsize.plot'))
|
||||
#
|
||||
# out=open(''.join([out_dir,"/",name,"_qc.txt"]),'w')
|
||||
|
||||
with open(fastp_json, 'r') as file:
|
||||
fastp = json.load(file)
|
||||
|
||||
with open(os.path.join(sort_bamdst_out, 'coverage.report'), 'r') as file2:
|
||||
bamdst = {}
|
||||
for line in file2:
|
||||
if not re.match(r'^#', line):
|
||||
lines = line.strip().split('\t')
|
||||
bamdst[lines[0]] = lines[1]
|
||||
|
||||
with open(os.path.join(rmdup_bamdst_out, 'coverage.report'), 'r') as file3:
|
||||
rmdup_bamdst = {}
|
||||
for line in file3:
|
||||
if not re.match(r'^#', line):
|
||||
lines = line.strip().split('\t')
|
||||
rmdup_bamdst[lines[0]] = lines[1]
|
||||
|
||||
with open(genecore_json, 'r') as file4:
|
||||
rmdup_js = json.load(file4)
|
||||
|
||||
out.write(''.join(["raw_reads\t", str(fastp["summary"]["before_filtering"]["total_reads"]), "\n"]))
|
||||
out.write(''.join(["raw_bases\t", str(fastp["summary"]["before_filtering"]["total_bases"]), "\n"]))
|
||||
out.write(''.join(["clean_reads\t", str(fastp["summary"]["after_filtering"]["total_reads"]), "\n"]))
|
||||
out.write(''.join(["clean_bases\t", str(fastp["summary"]["after_filtering"]["total_bases"]), "\n"]))
|
||||
out.write(''.join(["clean_reads_rate(%)\t", str('%.2f' % (
|
||||
fastp["summary"]["after_filtering"]["total_reads"] / fastp["summary"]["before_filtering"][
|
||||
"total_reads"] * 100)), "\n"]))
|
||||
out.write(''.join(["Q20(%)\t", str('%.2f' % (fastp["summary"]["before_filtering"]["q20_rate"] * 100)), "\n"]))
|
||||
out.write(''.join(["Q30(%)\t", str('%.2f' % (fastp["summary"]["before_filtering"]["q30_rate"] * 100)), "\n"]))
|
||||
out.write(''.join(["mapped_reads\t", str(bamdst["[Total] Mapped Reads"]), "\n"]))
|
||||
out.write(''.join(["mapped_rate(%)\t", str('%.2f' % (
|
||||
int(bamdst["[Total] Mapped Reads"]) / fastp["summary"]["after_filtering"]["total_reads"] * 100)), "\n"]))
|
||||
# out.write(''.join(["dup_reads\t",str(bamdst["[Total] PCR duplicate reads"]),"\n"]))
|
||||
out.write(''.join(["dup_rate(%)\t", str('%.2f' % (rmdup_js["summary"]["duplication_rate"] * 100)), "\n"]))
|
||||
|
||||
out.write(''.join(["probe_bed_size\t", str(bamdst["[Target] Len of region"]), "\n"]))
|
||||
out.write(''.join(["target_reads\t", str(bamdst["[Target] Target Reads"]), "\n"]))
|
||||
out.write(''.join(
|
||||
["capture_rate(reads)\t", str(bamdst["[Target] Fraction of Target Reads in mapped reads"].replace('%', '')), "\n"]))
|
||||
out.write(''.join(["mean_depth_raw\t", str(bamdst["[Target] Average depth"]), "\n"]))
|
||||
out.write(''.join(["mean_depth(dedup)\t", str(rmdup_bamdst["[Target] Average depth(rmdup)"]), "\n"]))
|
||||
out.write(''.join(["coverage(>0x)\t", str(rmdup_bamdst["[Target] Coverage (>0x)"].replace('%', '')), "\n"]))
|
||||
out.write(''.join(["coverage(>10x)\t", str(rmdup_bamdst["[Target] Coverage (>=10x)"].replace('%', '')), "\n"]))
|
||||
out.write(''.join(["coverage(>30x)\t", str(rmdup_bamdst["[Target] Coverage (>=30x)"].replace('%', '')), "\n"]))
|
||||
uniformity = round((float(rmdup_bamdst["[Target] Average depth"])) * 0.2)
|
||||
|
||||
b = os.popen("sed -n %s'p' %s | cut -f5" % (uniformity, os.path.join(rmdup_bamdst_out, 'depth_distribution.plot')))
|
||||
b = float(b.read().strip()) * 100
|
||||
out.write(''.join(["coverage(>=0.2*meanx)\t", str('%.2f' % (b)), "\n"]))
|
||||
|
||||
depth_tsv_df = pd.read_csv(os.path.join(rmdup_bamdst_out, 'depth.tsv.gz'), compression='gzip', sep="\t")
|
||||
coverage_gtoe_80 = round(np.percentile(np.array(depth_tsv_df['Rmdup depth']), 20))
|
||||
out.write(''.join(["coverage(>=80%)\t", str(coverage_gtoe_80), "\n"]))
|
||||
|
||||
insert_df = pd.read_csv(os.path.join(rmdup_bamdst_out, 'insertsize.plot'), sep="\t", header=None)
|
||||
insert_peak = insert_df.loc[insert_df.loc[:, 1].idxmax(), 0]
|
||||
insert_avg = ((insert_df.loc[:, 0] * insert_df.loc[:, 1]).sum()) / (insert_df.loc[:, 1].sum())
|
||||
out.write(''.join(["insertsize_peak\t", str(insert_peak), "\n"]))
|
||||
out.write(''.join(["insertsize_avg\t", str(int(insert_avg)), "\n"]))
|
||||
out.write(''.join(["uniq_family_count(ss)\t", str((rmdup_js["summary"]["single_stranded_consensus_sequence"]) + (
|
||||
rmdup_js["summary"]["duplex_consensus_sequence"])), "\n"]))
|
||||
dup_list = rmdup_js["before_processing"]["duplication_level_histogram"]
|
||||
gtoe2 = (sum(dup_list) - dup_list[0]) / sum(dup_list)
|
||||
gtoe3 = (sum(dup_list) - dup_list[0] - dup_list[1]) / sum(dup_list)
|
||||
out.write(''.join(["family_size_gtoe2(%)\t", str('%.2f' % (gtoe2 * 100)), "\n"]))
|
||||
out.write(''.join(["family_size_gtoe3(%)\t", str('%.2f' % (gtoe3 * 100)), "\n"]))
|
||||
|
||||
out.close()
|
||||
|
|
@ -32,7 +32,7 @@ task run_msi {
|
|||
|
||||
elif [ "${probe}" == "160" ]; then
|
||||
|
||||
echo "msi 624 探针 "
|
||||
echo "msi 160 探针 "
|
||||
msisensor2 msi \
|
||||
-M /dataseq/jmdna/software/msisensor2/models_hg19_GRCh37 \
|
||||
-t -e $PUBLIC/msi/624_650_160_intersect_74.bed \
|
||||
|
|
|
|||
Loading…
Reference in New Issue