From c55811e306f7ef68d212767ae434a7d69d0d14d2 Mon Sep 17 00:00:00 2001 From: chaopower Date: Wed, 27 Sep 2023 17:57:28 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=84=E7=90=86=E8=BF=87=E6=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/filter_snpindel_common.pl | 206 +++++++++++ script/filter_snpindel_umi_1r_plus_2r.pl | 45 +++ script/filter_snpindel_umi_correct_f1r1.py | 115 ++++++ script/filter_snpindel_umi_subnormal.pl | 46 +++ script/func_fetch_bam.py | 29 ++ script/public/blacklist.txt | 21 ++ script/public/info.txt | 6 + script/run_pipeline.py | 33 ++ script/run_wdl.py | 70 ++++ wdl/call_mutation.wdl | 200 ++++++----- wdl/task.wdl | 397 +++++++++------------ 11 files changed, 856 insertions(+), 312 deletions(-) create mode 100755 script/filter_snpindel_common.pl create mode 100755 script/filter_snpindel_umi_1r_plus_2r.pl create mode 100755 script/filter_snpindel_umi_correct_f1r1.py create mode 100755 script/filter_snpindel_umi_subnormal.pl create mode 100755 script/func_fetch_bam.py create mode 100644 script/public/blacklist.txt create mode 100644 script/public/info.txt create mode 100755 script/run_pipeline.py create mode 100755 script/run_wdl.py diff --git a/script/filter_snpindel_common.pl b/script/filter_snpindel_common.pl new file mode 100755 index 0000000..5f53ad1 --- /dev/null +++ b/script/filter_snpindel_common.pl @@ -0,0 +1,206 @@ +#!/usr/bin/env perl +use strict; +#use warnings; +use List::Util qw(sum); + +die "useage:perl $0 input project sample_type somtic_out germline_out tag_out " unless @ARGV == 6; +my ($input, $project, $sample_type, $somtic_out, $germline_out, $tag_out) = @ARGV; + +# die "useage:perl $0 output_dir tumor project sample_type" unless @ARGV == 4; +# my ($output_dir, $name, $project, $sample_type) = @ARGV; + +# open IN, "$output_dir/mutation/${name}.snp.indel.anno.hg19_multianno.txt"; +open IN, "$input"; +my $head = ; +# if ($sample_type eq 'c') { +# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt"; +# } +# elsif ($sample_type eq 't') { +# open OUT, ">${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt"; +# } + +open OUT, "> $somtic_out"; +print OUT "可信\t$head"; + +# open OUT2, ">${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"; +open OUT2, ">$germline_out"; +print OUT2 "临床意义\t$head"; + +# open OUT3, ">${name}.snp.indel.anno.hg19_multianno_tag.txt"; +open OUT3, ">$tag_out"; +print OUT3 "TAG\t$head"; + +##black list +my $public_path = defined $ENV{'PUBLIC'} ? $ENV{'PUBLIC'} : "/dataseq/jmdna/codes/public/"; +open BKLT, "$public_path/blacklist.txt"; +my %bk; +; +while () { + chomp; + my @line = split("\t"); + my $key = join("_", @line[0 .. 4]); + $bk{$key} = 1; +} +sub blacklist { + my $pos = shift @_; + if (exists $bk{$pos}) { + return "1"; + } + else { + return ""; + } +} + +open INFO, "$public_path/info.txt"; +my @muts; +while () { + chomp; + my @line = split(/\t/, $_); + if ($line[0] eq $project) { + if ($line[2] ne "NA") { + @muts = split(/\//, $line[2]); + } + } +} +while () { + chomp; + my @line = split(/\t/, $_); + my $freq = (split(":", $line[-1]))[4]; + # next if $line[9] eq '.'; + if ($line[8] ne "synonymous SNV" and $line[8] ne "unknown"){ + if ( $line[17] < 0.01 and $line[18] < 0.01 and $line[19] < 0.01 and $line[20] < 0.01 and $line[23] < 0.01 and $line[28] < 0.01 and $line[32] < 0.01){ + if ($line[16] =~ /benign/i and $line[16] !~ /pathogenic|Affects|association|Conflicting|sensitivity|drug|other|risk|protective|Uncertain|not_provided|\./i) { + print OUT3 "benign\t", join("\t", @line), "\n"; + next; + } + if ($sample_type eq 'c') { + if ($line[11] =~ /OCCURENCE=(\S+)/) { + my $cosmic = $1; + $cosmic =~ s/\(\S+?\)//g; + my @cosmic = split(",", $cosmic); + $cosmic = sum @cosmic; + if ($freq < 0.01 and $cosmic <= 1) { + print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n"; + next; + } + } + if ($freq < 0.01 and $line[11] eq '.') { + print OUT3 "cfdna_lowfreq_cosmic\t", join("\t", @line), "\n"; + next; + } + } + #blacklist + my $key = join("_", @line[0 .. 4]); + if (&blacklist($key)) { + print OUT3 "blacklist\t", join("\t", @line), "\n"; + next; + }; + if ($line[9] ne '.') { + my @hgvs = split(/,/, $line[9]); + my $hgvs = $hgvs[0]; + $hgvs =~ /(\S+):(\S+):exon(\d+):c\.(\S+):p\.(\S+)$/; + my $gene = $1; + if (!(@muts and grep {$gene eq $_} @muts)) { + print OUT3 "nontarget_gene\t", join("\t", @line), "\n"; + next; + } + + if ($line[101] ne 'PASS') { + my $filter = split(";", $line[101]); + if ($freq < 0.02 or ($freq >= 0.02 and $freq < 0.05 and $filter >= 2)) { + print OUT3 "byfilter\t", join("\t", @line), "\n"; + next; + }; + } + if (my $transcript = &transcript($gene)) { + if (grep {/$transcript/} @hgvs) { + $hgvs = (grep {/$transcript/} @hgvs)[0]; + } + } + $line[9] = $hgvs; + print OUT "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n"; + print OUT3 "PASS\t", join("\t", @line), "\n"; + if ($freq > 0.1) { + if ($line[16] =~ /Likely_pathogenic|drug/i) { + print OUT2 "2\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n"; + } + elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) { + print OUT2 "1\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n"; + } + else { + print OUT2 "3\t", join("\t", (@line[0 .. 4], "exonic", $gene, @line[7 .. $#line])), "\n"; + } + } + } + elsif ($line[5] =~ /splicing/) { + next if $line[101] ne 'PASS'; + my $gene = (split(";", $line[6]))[0]; + if (!(@muts and grep {$gene eq $_} @muts)) { + print OUT3 "nontarget_gene\t", join("\t", @line), "\n"; + next; + }; + my @hgvs = split(/;/, $line[7]); + my $hgvs = $hgvs[0]; + if (my $transcript = &transcript($gene)) { + if (grep {/$transcript/} @hgvs) { + $hgvs = (grep {/$transcript/} @hgvs)[0]; + } + } + $hgvs =~ /(\S+):exon(\d+):c\.(\S+)$/; + my $spl = $3; + + if ($spl =~ /\d+[\+|\-][1|2]\D+/) { + $line[7] = join(":", ($gene, $hgvs)); + print OUT3 "PASS\t", join("\t", @line), "\n"; + print OUT "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n"; + if ($freq > 0.1) { + if ($line[16] =~ /Likely_pathogenic|drug/i) { + print OUT2 "2\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n"; + } + elsif ($line[16] =~ /pathogenic/i and $line[16] !~ /Conflicting/i) { + print OUT2 "1\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n"; + } + else { + print OUT2 "3\t", join("\t", (@line[0 .. 4], "splicing", $gene, '.', '.', @line[7, 10 .. $#line])), "\n"; + } + } + } + else { + print OUT3 "unknow1\t", join("\t", @line), "\n"; + } + + } + else { + print OUT3 "unknow2\t", join("\t", @line), "\n"; + } + } + else{ + print OUT3 "common_snp\t", join("\t", @line), "\n"; + } + } + else { + print OUT3 "synonymous\t", join("\t", @line), "\n"; + } +} + + + +sub transcript { + my $gene = shift @_; + my $data_path = defined $ENV{'DATABASE'} ? $ENV{'DATABASE'} : "/dataseq/jmdna/codes/reportbase/"; + open TR, "$data_path/oncokbgene.txt"; + my %oncogene; + while () { + chomp; + my @line = split; + $oncogene{$line[0]} = $line[2]; + } + if (exists $oncogene{$gene}) { + $oncogene{$gene} =~ s/\.\d+//; + return $oncogene{$gene}; + } + else { + print "$gene has no NM id in oncokbgene.txt"; + return ""; + } +} diff --git a/script/filter_snpindel_umi_1r_plus_2r.pl b/script/filter_snpindel_umi_1r_plus_2r.pl new file mode 100755 index 0000000..c6c2d25 --- /dev/null +++ b/script/filter_snpindel_umi_1r_plus_2r.pl @@ -0,0 +1,45 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +die "usage:perl $0 vcf1r vcf2r outvcf" unless @ARGV == 3; +my ($vcf1r, $vcf2r, $outvcf) = @ARGV; + +# open R1, "${outputdir}/mutation/${tumor}.1r.snp.indel.vcf"; +# open R2, "${outputdir}/mutation/${tumor}.2r.snp.indel.vcf"; +# open OUT, ">${outputdir}/mutation/${tumor}.snp.indel.vcf"; + +open R1, "$vcf1r"; +open R2, "$vcf2r"; +open OUT, ">$outvcf"; + +my (%r1, %r2); +while () { + next if /^#/; + chomp; + my @line = split; + my $key = join("_", @line[0, 1, 3, 4]); + $r2{$key} = $_; +} + +while () { + if (/^#/) { + print OUT; + next; + } + chomp; + my @line = split; + my $freq = (split(":", $line[-1]))[4]; + my $dp = (split(":", $line[-1]))[1]; + next if $dp < 50; + + if ($freq < 0.01) { + my $key = join("_", @line[0, 1, 3, 4]); + if (exists $r2{$key}) { + print OUT "$_\n"; + } + } + else { + print OUT "$_\n"; + } +} diff --git a/script/filter_snpindel_umi_correct_f1r1.py b/script/filter_snpindel_umi_correct_f1r1.py new file mode 100755 index 0000000..d2dedc8 --- /dev/null +++ b/script/filter_snpindel_umi_correct_f1r1.py @@ -0,0 +1,115 @@ +#!/usr/bin/python3 +# -*- coding: UTF-8 -*- +import sys + +import pandas as pd +import pysam + +if len(sys.argv) != 4: + print(" ".join(['usage:python', sys.argv[0], 'filter_file', 'bam_file', 'output'])) + sys.exit() + +# output_dir = sys.argv[1] +# tumor = sys.argv[2] + +# infile = "".join([output_dir, '/mutation/', tumor, '.snp.indel.Somatic.annoall.hg19_multianno_filtered_pre.txt']) +# bamfile = "".join([output_dir, '/alignment/', tumor, '.rmdup.bam']) +# outfile = "".join([output_dir, '/mutation/', tumor, '.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt']) + +infile = sys.argv[1] +bamfile = sys.argv[2] +outfile = sys.argv[3] + +samfile = pysam.AlignmentFile(bamfile) + +OUT = open(outfile, 'w') + + +def correct(chr, start, end, alt_base): + total_reads = [] + alt_reads = [] + for pileupcolumn in samfile.pileup(chr, start, end, stepper="samtools", min_base_quality=0, min_mapping_quality=20, + max_depth=100000, ignore_overlaps=False, truncate=True, ignore_orphans=False): + for pileupread in pileupcolumn.pileups: + # print(str(pileupread)) + if not pileupread.is_del and not pileupread.is_refskip: + if pileupread.alignment.query_sequence[pileupread.query_position] == alt_base: + alt_reads.append(pileupread.alignment.query_name) + else: + if pileupread.alignment.get_tag('NM') < 4 and pileupread.alignment.query_qualities[ + pileupread.query_position] >= 20: + total_reads.append(pileupread.alignment.query_name) + alt_reads = list(set(alt_reads)) + non_alt_depth = len(list(set(total_reads))) + dic = {'chr': [], + 'pos': [], + 'read': [], + 'base': [], + 'quality': [], + 'NM': [], + 'FR_RR': [] + } + for read in alt_reads: + for pileupcolumn in samfile.pileup(chr, start, end, stepper="samtools", min_base_quality=0, + min_mapping_quality=20, max_depth=100000, ignore_overlaps=False, + truncate=True, ignore_orphans=False): + for pileupread in pileupcolumn.pileups: + if not pileupread.is_del and not pileupread.is_refskip and pileupread.alignment.query_name == read: + dic['chr'].append(pileupcolumn.reference_name) + dic['pos'].append(pileupcolumn.reference_pos + 1) + dic['read'].append(pileupread.alignment.query_name) + dic['base'].append(pileupread.alignment.query_sequence[pileupread.query_position]) + dic['quality'].append(pileupread.alignment.query_qualities[pileupread.query_position]) + dic['NM'].append(pileupread.alignment.get_tag('NM')) + fr = 0 + rr = 0 + if pileupread.alignment.has_tag('FR'): + fr = pileupread.alignment.get_tag('FR') + if pileupread.alignment.has_tag('RR'): + fr = pileupread.alignment.get_tag('RR') + dic['FR_RR'].append(fr + rr) + dic = pd.DataFrame(dic) + b = alt_reads[:] + for R in b: + if len(list(dic[dic['read'] == R]['base'])) == 2: + if list(dic[dic['read'] == R]['base'])[0] != list(dic[dic['read'] == R]['base'])[1]: + print(dic[dic['read'] == R]['base']) + print(dic[dic['read'] == R]['quality']) + alt_reads.remove(R) + else: + if (list(dic[dic['read'] == R]['quality'])[0] >= 20 and list(dic[dic['read'] == R]['NM'])[0] < 4) or ( + list(dic[dic['read'] == R]['quality'])[1] >= 20 and list(dic[dic['read'] == R]['NM'])[1] < 4): + pass + else: + alt_reads.remove(R) + else: + if list(dic[dic['read'] == R]['quality'])[0] < 20 or list(dic[dic['read'] == R]['NM'])[0] >= 4: + alt_reads.remove(R) + alt_reads_num = len(alt_reads) + total_depth = non_alt_depth + alt_reads_num + correct_num = 0 + for index, row in dic.iterrows(): + if row['read'] in alt_reads: + if row['quality'] >= 20 and row['NM'] < 4 and row['FR_RR'] > 1: + correct_num += 1 + # if alt_reads_num > 3 and correct_num > 0: + # return(alt_reads_num) + # else: + # return(0) + return (alt_reads_num, correct_num, total_depth) + + +snv = pd.read_table(infile, sep="\t") +cols = [index for index, row in snv[snv['可信'] == 0].iterrows()] +snv.drop(cols, inplace=True) +drop_index = [] +for index, row in snv.iterrows(): + if len(row['Ref']) == 1 and len(row['Alt']) == 1: + if float(row['Otherinfo13'].split(':')[4]) < 0.05: + c = correct(row['Chr'], row['Start'] - 1, row['End'], row['Alt']) + if float(c[0]) < 3 or float(c[1]) < 1 or float(c[0] / (c[0] + c[2])) < 0.002: + drop_index.append(index) + +snv.drop(labels=drop_index, inplace=True) +# OUT.write(snv) +snv.to_csv(outfile, index=False, sep="\t") diff --git a/script/filter_snpindel_umi_subnormal.pl b/script/filter_snpindel_umi_subnormal.pl new file mode 100755 index 0000000..faa89a4 --- /dev/null +++ b/script/filter_snpindel_umi_subnormal.pl @@ -0,0 +1,46 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +die "usage:perl $0 tumor normal output" unless @ARGV == 3; +my ($tumor, $normal, $output) = @ARGV; +open T, "$tumor"; +open N, "$normal"; +open OUT, "> $output"; + +my %n; +while () { + next if /^#/; + chomp; + my @line = split; + my $freq = (split(":", $line[-1]))[4]; + my $key = join '_', @line[0, 1, 3, 4]; + $n{$key} = $freq; +} + +while () { + if (/^#/) { + print OUT; + next; + } + chomp; + my @line = split; + my $freq = (split(":", $line[-1]))[4]; + my $key = join '_', @line[0, 1, 3, 4]; + if (not exists $n{$key}) { + print OUT "$_\n"; + next; + } + else { + # 去除对照样本中频率大于0.05的 + if ($n{$key} >= 0.05) { + next; + } + else { + # 频率是对照样本的五倍? + if ($freq / $n{$key} > 5) { + print OUT "$_\n"; + } + } + } +} diff --git a/script/func_fetch_bam.py b/script/func_fetch_bam.py new file mode 100755 index 0000000..c701a8c --- /dev/null +++ b/script/func_fetch_bam.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +import sys + +import pysam + +# 提取fr+rr>=2的点 +if len(sys.argv) != 3: + print(" ".join(['usage:python', sys.argv[0], 'outputDir', 'tumor'])) + sys.exit() + +# output_dir = sys.argv[1] +# tumor = sys.argv[2] + +# infile = "".join([output_dir, '/alignment/', tumor, '.rmdup.bam']) +# outfile = "".join([output_dir, '/alignment/', tumor, '.2r.rmdup.bam']) +samfile = pysam.AlignmentFile(sys.argv[1], "rb") +bamfile = pysam.AlignmentFile(sys.argv[2], "wb", template=samfile) +for s in samfile: + fr = 0 + rr = 0 + if s.has_tag('FR'): + fr = s.get_tag('FR') + if s.has_tag('RR'): + rr = s.get_tag('RR') + if fr + rr >= 2: + bamfile.write(s) + +# os.system('samtools index ' + outfile) diff --git a/script/public/blacklist.txt b/script/public/blacklist.txt new file mode 100644 index 0000000..35c007b --- /dev/null +++ b/script/public/blacklist.txt @@ -0,0 +1,21 @@ +Chr Start End Ref Alt Gene AAchange +chr6 29911227 29911228 GA TG HLA-A p.E176W +chr6 29912029 29912030 GG C HLA-A p.Q250Hfs*47 +chr6 29912383 29912383 G A HLA-A p.R334R +chr6 31237764 31237774 TCATAGCGGTG CCACAACAGCC HLA-C p.T329_M332delinsAVVV +chr6 31237994 31237994 C - HLA-C p.S297Afs*25 +chr6 31238865 31238865 T - HLA-C p.T202Rfs*12 +chr6 31238884 31238884 G T HLA-C p.Y195X +chr6 31238909 31238910 GT AG HLA-C p.T187L +chr6 31239431 31239431 C - HLA-C p.A97Lfs*4 +chr6 31239490 31239490 C - HLA-C p.E77Sfs*24 +chr6 31324003 31324004 TC GT HLA-B p.E187T +chr6 31324525 31324536 CCTGGGCCTTGT TGTTGGTCTTGG HLA-B p.Y91_A95delinsSKTNT +chr6 31324734 31324861 CCTGGGGGTGAGGAGGGGCTGAGACCCGCCCGACCCTCCTCCCGGCGCGGCTCCTCAGGTCCTGCGCCCCCGCCTGCGGTCCCCTCGCTCCTCCCGGCAGAGGCCATTTCCCTCCCGACCCGCACTCA - HLA-B p.G25Afs*5 +chr6 29910581 29910583 CGC AGT HLA-A p.R41S +chr6 31237862 31237862 T A HLA-C p.E299V +chr6 31324525 31324549 CCTGGGCCTTGTAGATCTGTGTGTT TCTGGCGCTTGTACTTCTGTGTCTC HLA-B p.N87_A95delinsETQKYKRQT +chr6 31238909 31238910 GT TC HLA-C p.T187E +chr3 10088407 10088410 AGTA - FANCD2 p.V427Ffs*20 +chr12 57112003 57112003 A G NACA p.M1104T +chr12 57112022 57112022 A G NACA p.S1098P diff --git a/script/public/info.txt b/script/public/info.txt new file mode 100644 index 0000000..e457bf7 --- /dev/null +++ b/script/public/info.txt @@ -0,0 +1,6 @@ +project probe mutation splicing promoter cnv fusion long_indel chemotherapy_drug non_target_mutation +160gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MET/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MDM2/MDM4/CDKN2A ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA NA +650gene /dataseq/jmdna/database/bed/650.bed ABL1/AKT1/AKT2/AKT3/ALK/APC/ARAF/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/BTK/CCND1/CCND2/CCND3/CDK12/CDK4/CDK6/CDKN2A/CDKN2B/CHEK1/CHEK2/CSF1R/CTNNB1/DDR2/EGFR/ERBB2/ERBB3/ERBB4/ESR1/EZH2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FGFR4/FLT3/GNA11/GNAQ/HRAS/IDH1/IDH2/JAK1/JAK2/JAK3/KDR/KIT/KRAS/MAP2K1/MAP2K2/MET/MPL/MTOR/MYCN/MYD88/NF1/NF2/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PDGFRB/PIK3CA/PTCH1/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RAF1/RB1/RET/ROS1/SMAD4/SMARCB1/SMO/STK11/TP53/TSC1/TSC2/VHL MET TERT CDK4/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/MYCN/MDM2/MDM4/CDKN2A/CDKN2B ALK/BRAF/FGFR1/FGFR2/FGFR3/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 NA NA +lung85gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/ATM/BARD1/BCL2L11/BRAF/BRCA1/BRCA2/BRIP1/CCND1/CCND2/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/DDR2/EGFR/ERBB2/ERBB4/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/HRAS/IDH1/KIT/KRAS/MET/MTOR/MYC/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RET/ROS1/TP53/TSC1/TSC2/VHL/STK11/APC/ARID1A/CTNNB1/EPCAM/ERBB3/MAP2K1/MLH1/MSH2/MSH6/PMS2/RB1/SMAD4/SMO MET NA ALK/CCND1/CCND2/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/MET/MYC/SMO ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 BCL2L11 ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3 +crc88gene /dataseq/jmdna/database/bed/160.bed AKT1/ALK/APC/ARID1A/ATM/BARD1/BRAF/BRCA1/BRCA2/BRIP1/CCND2/CCND3/CDK12/CDK4/CDKN2A/CHEK1/CHEK2/CSF1R/CTNNB1/EGFR/EPCAM/ERBB2/FANCL/FBXW7/FGFR1/FGFR2/FGFR3/FLT3/IDH1/KDR/KIT/KRAS/MAP2K1/MET/MLH1/MSH2/MSH6/MTOR/NF1/NRAS/NTRK1/NTRK2/NTRK3/PALB2/PDGFRA/PIK3CA/PMS2/PTEN/RAD51B/RAD51C/RAD51D/RAD54L/RB1/RET/ROS1/SMAD4/SMO/TP53/TSC1/TSC2/VHL/POLD1/POLE/STK11/MUTYH/SDHA/SDHB/SDHC/SDHD MET NA ALK/CCND2/CCND3/CDK4/CDKN2A/CSF1R/EGFR/ERBB2/FGFR1/FGFR2/FGFR3/FLT3/MET/PTEN/RB1 ALK/NTRK1/NTRK2/NTRK3/RET/ROS1 NA ABCB1/CASP7/CDA/CYP2C8/CYP3A4/DPYD/DYNC2H1/ERCC1/ERCC2/GSTP1/MTHFR/NQO1/SOD2/TPMT/TYMS/UGT1A1/XRCC1/XPC/HAS3 NA +17gene /dataseq/jmdna/database/bed/lung17gene.hg19.liftover.bed EGFR/BRAF/ERBB2/ALK/MET/AKT1/KRAS/MAP2K1/NRAS/PTEN/PIK3CA/NTRK1/NTRK2/NTRK3 MET NA ERBB2/MET/EGFR ALK/RET/ROS1/NTRK1/NTRK2/NTRK3 BCL2L11 NA lung diff --git a/script/run_pipeline.py b/script/run_pipeline.py new file mode 100755 index 0000000..484ae2f --- /dev/null +++ b/script/run_pipeline.py @@ -0,0 +1,33 @@ +import argparse +import os + +run_wdl_path = os.path.join(os.path.dirname(__file__), 'run_wdl.py') + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="JM to run pipeline") + + parser.add_argument('-n', '--barcode', help="sample's barcode", required=True) + parser.add_argument('-s', '--normal', help="sample's normal", default='', required=False, nargs='?') + parser.add_argument('-u', '--umi', action='store_true', help="is umi sample", default=False) + parser.add_argument('-i', '--input_dir', help="sample's input_dir/workdir", required=True) + parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='./') + parser.add_argument('-p', '--project', help="project", required=True) + parser.add_argument('-b', '--bed', help="bed", required=True) + parser.add_argument('-w', '--wdl', help="wdl") + + args = parser.parse_args() + + res_path = os.path.realpath(os.path.join(args.output_dir, args.barcode)) + + if not os.path.exists(res_path): + os.mkdir(res_path) + + cmd = f'nohup python ' \ + f'{run_wdl_path} -n {args.barcode} -s {args.normal} ' \ + f'{"-u " if args.umi else ""} -i {args.input_dir} ' \ + f'-o {res_path} -b {args.bed} -p {args.project} -w {args.wdl} ' \ + f'> {res_path}/{args.barcode}_run.log ' \ + f'2>> {res_path}/{args.barcode}_run.log &' + with open(os.path.join(res_path, 'exec'), 'w') as execfile: + execfile.write(cmd + '\n') + os.system(cmd) diff --git a/script/run_wdl.py b/script/run_wdl.py new file mode 100755 index 0000000..65fc49c --- /dev/null +++ b/script/run_wdl.py @@ -0,0 +1,70 @@ +import argparse +import json +import os +import subprocess +import time + + +def run(barcode, normal, umi, input_dir, output_dir, project, bed, wdl): + input_dir = os.path.realpath(input_dir) + output_dir = os.path.realpath(output_dir) + wdl = os.path.realpath(wdl) + arg = { + "pipeline.tumor": barcode, + "pipeline.normal": normal, + "pipeline.umi": umi, + "pipeline.input_dir": input_dir, + "pipeline.output_dir": output_dir, + "pipeline.project": project, + "pipeline.bed": bed + } + + arg = {key: value for key, value in arg.items() if value not in (None, '', False)} + + # generate json + jsfile_path = os.path.join(output_dir, f'{barcode}.json') + with open(jsfile_path, 'w') as jsfile: + jsfile.write(json.dumps(arg, indent=4, ensure_ascii=False)) + + # run pipeline + cmd1 = 'export PATH=/home/zhangchao/project/pipeline/workflow/script:$PATH' + cmd2 = 'export PUBLIC=/home/zhangchao/project/pipeline/workflow/script/public/' + cmd3 = f'cd {output_dir}' + cmd4 = f'/home/zhangchao/soft/jdk-17.0.7+7/bin/java -jar /home/zhangchao/soft/cromwell-85.jar run --inputs {jsfile_path} {wdl}' + cmd = f'{cmd1}; {cmd2}; {cmd3}; {cmd4}' + + # 记录开始时间 + start_time = time.time() + print(cmd) + ret = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") + pidnum = ret.pid + with open(os.path.join(output_dir, 'pid'), 'w') as pidfile: + pidfile.write(str(pidnum)) + ret.wait() + # 记录结束时间 + end_time = time.time() + # 计算运行时间 + elapsed_time = end_time - start_time + + print("\n运行时间:{:.2f} 秒".format(elapsed_time)) + + print(ret.stdout.read(), ret.stderr.read()) + print('#' * 50) + print('读取日志') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="JM to run pipeline") + + parser.add_argument('-n', '--barcode', help="sample's barcode", required=True) + parser.add_argument('-s', '--normal', help="sample's normal", default='', required=False, nargs='?') + parser.add_argument('-u', '--umi', action='store_true', help="is umi sample", default=False) + parser.add_argument('-i', '--input_dir', help="sample's input_dir/workdir", required=True) + parser.add_argument('-o', '--output_dir', help="Output directory, default ./", default='./') + parser.add_argument('-p', '--project', help="project", required=True) + parser.add_argument('-b', '--bed', help="bed", required=True) + parser.add_argument('-w', '--wdl', help="wdl") + + args = parser.parse_args() + + run(args.barcode, args.normal, args.umi, args.input_dir, args.output_dir, args.project, args.bed, args.wdl) diff --git a/wdl/call_mutation.wdl b/wdl/call_mutation.wdl index 41efa3f..7897140 100644 --- a/wdl/call_mutation.wdl +++ b/wdl/call_mutation.wdl @@ -10,7 +10,6 @@ task mutation_calling_umi { mkdir ${output_dir}/mutation fi - #1条call # 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多 # -r 3 是指有3条这样样的reads支撑 @@ -27,7 +26,7 @@ task mutation_calling_umi { -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf #提取>=2条矫正的序列 - python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam + func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam # 保证 1r call mut umi family 里面有2条reads @@ -38,25 +37,15 @@ task mutation_calling_umi { | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf #merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 - perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \ + filter_snpindel_umi_1r_plus_2r.pl \ ${output_dir}/mutation/${name}.1r.snp.indel.vcf \ ${output_dir}/mutation/${name}.2r.snp.indel.vcf \ ${output_dir}/mutation/${name}.snp.indel.vcf - table_annovar.pl \ - ${output_dir}/mutation/${name}.snp.indel.vcf \ - /dataseq/jmdna/software/annovar/humandb/ \ - -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ - -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ - -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ - --intronhgvs 50 \ - -operation g,f,f,f,f,f,f,f,f,f,r \ - --outfile ${output_dir}/mutation/${name}.snp.indel.anno - >>> output { - String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf" + String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf" } } @@ -86,16 +75,6 @@ task mutation_calling_tissue { -c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf - table_annovar.pl \ - ${output_dir}/mutation/${name}.snp.indel.vcf \ - /dataseq/jmdna/software/annovar/humandb/ \ - -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ - -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ - -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ - -operation g,f,f,f,f,f,f,f,f,f \ - --intronhgvs 50 \ - --outfile ${output_dir}/mutation/${name}.snp.indel.anno - >>> output { @@ -124,15 +103,73 @@ task mutation_calling_tissue_control { -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf - table_annovar.pl \ - ${output_dir}/mutation/${name}.snp.indel.vcf \ - /dataseq/jmdna/software/annovar/humandb/ \ - -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ - -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ - -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ - -operation g,f,f,f,f,f,f,f,f,f \ - --intronhgvs 50 \ - --outfile ${output_dir}/mutation/${name}.snp.indel.anno + >>> + + output { + String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf" + } +} + +task mutation_calling_umi_control { + String name + String bed + String ref + String output_dir + String tumor_rmdup_bam + String normal_rmdup_bam + + command <<< + if [ ! -d ${output_dir}/mutation ];then + mkdir ${output_dir}/mutation + fi + + # 对照样本 + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ + -G ${ref} \ + -f 0.01 \ + -N ${name} \ + -b ${normal_rmdup_bam} \ + -UN \ + -Q 20 \ + -m 3 \ + -r 3 \ + -th 10 \ + -c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ + |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}_normal.snp.indel.vcf + + # 实验样本 + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ + -G ${ref} \ + -f 0.001 \ + -N ${name} \ + -b ${tumor_rmdup_bam} \ + -UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \ + -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf + + #提取>=2条矫正的序列 + func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam + samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam + + # 保证 1r call mut umi family 里面有2条reads + #2条矫正的call + java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ + -f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \ + -UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ + | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf + + #merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 + filter_snpindel_umi_1r_plus_2r.pl \ + ${output_dir}/mutation/${name}.1r.snp.indel.vcf \ + ${output_dir}/mutation/${name}.2r.snp.indel.vcf \ + ${output_dir}/mutation/${name}.snp.indel.vcf + + # 去除normal 中的突变位点 + filter_snpindel_umi_subnormal.pl \ + ${output_dir}/mutation/${name}_tumor.snp.indel.vcf \ + ${output_dir}/mutation/${name}_normal.snp.indel.vcf \ + ${output_dir}/mutation/${name}.snp.indel.vcf >>> @@ -152,64 +189,59 @@ workflow call_mutation { String ref String bed - scatter(name in [tumor, normal]) { - if (defined(name)) { - if (name==tumor) { - if (umi) { - call mutation_calling_umi as tumor_mutation_calling_umi { - input: - name=name, - output_dir=output_dir, - ref=ref, - bed=bed, - rmdup_bam=tumor_rmdup_bam - } - } - if (!umi) { - # 单样本模式,normal没有定义 - if (name==select_first([normal, tumor])) { - call mutation_calling_tissue as tumor_mutation_calling_tissue { - input: - name=name, - output_dir=output_dir, - ref=ref, - bed=bed, - rmdup_bam=normal_rmdup_bam - } - } - # 双样本模式,normal有定义 - if (name!=select_first([normal, tumor])) { - call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control { - input: - name=name, - output_dir=output_dir, - ref=ref, - bed=bed, - tumor_rmdup_bam=tumor_rmdup_bam, - normal_rmdup_bam=normal_rmdup_bam - } - } - } - } - if (name==select_first([normal, 'None'])) { - if (umi) { - call mutation_calling_tissue as normal_mutation_calling_tissue { - input: - name=name, - output_dir=output_dir, - ref=ref, - bed=bed, - rmdup_bam=normal_rmdup_bam - } - } + # 双样本 + if (defined(normal)) { + if (umi) { + call mutation_calling_umi_control { + input: + name=tumor, + output_dir=output_dir, + ref=ref, + bed=bed, + tumor_rmdup_bam=tumor_rmdup_bam, + normal_rmdup_bam=normal_rmdup_bam } + } + if (!umi) { + call mutation_calling_tissue_control { + input: + name=tumor, + output_dir=output_dir, + ref=ref, + bed=bed, + tumor_rmdup_bam=tumor_rmdup_bam, + normal_rmdup_bam=normal_rmdup_bam + } + } + } + + # 单样本 + if (!defined(normal)) { + if (umi) { + call mutation_calling_umi { + input: + name=tumor, + output_dir=output_dir, + ref=ref, + bed=bed, + rmdup_bam=tumor_rmdup_bam + } + } + if (!umi) { + call mutation_calling_tissue { + input: + name=tumor, + output_dir=output_dir, + ref=ref, + bed=bed, + rmdup_bam=normal_rmdup_bam + } } } output { String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf" - String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf" } } \ No newline at end of file diff --git a/wdl/task.wdl b/wdl/task.wdl index 50c742d..2d22d4d 100644 --- a/wdl/task.wdl +++ b/wdl/task.wdl @@ -6,130 +6,92 @@ task create_dir { if [ ! -d ${workdir} ];then mkdir -p ${workdir}/log fi - >>> } -task mutation_calling { - String name - String tumor_rmdupBam - String normal_rmdupBam - String outputDir - String bed - - command <<< - - if [ ! -d ${outputDir}/mutation ];then - mkdir ${outputDir}/mutation - fi - - java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ - -G /dataseq/jmdna/database/genome/hg19/hg19.fa \ - -f 0.01 \ - -N ${name} \ - -b "${tumor_rmdupBam}|${normal_rmdupBam}" \ - -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | \ - /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R | \ - /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \ - > ${outputDir}/mutation/${name}_vardict.snp.indel.vcf - - vep \ - --input_file ${outputDir}/mutation/${name}_vardict.snp.indel.vcf \ - --output_file ${outputDir}/mutation/${name}_vardict_vep.snp.indel.vcf \ - --format vcf \ - --vcf \ - --symbol \ - --terms SO \ - --hgvs \--fasta /dataseq/jmdna/database/genome/hg19/hg19.fa \ - --offline --cache --dir_cache /home/software/.vep \ - --pick \ - --force_overwrite - - >>> - - output { - String somatic_hc_vcf = "${outputDir}/mutation/${name}.snp.indel.Somatic.hc.vcf" - String germline_vcf="${outputDir}/mutation/${name}.snp.indel.Germline.vcf" - String loh_hc_vcf="${outputDir}/mutation/${name}.snp.indel.LOH.hc.vcf" - } - -} - task annovar { - String name - String outputDir + String prefix + String output_dir String ref - String somatic_hc_vcf - String germline_vcf - String loh_hc_vcf - String rmdupBam + String vcf command <<< - if [ ! -d ${outputDir}/mutation ];then - mkdir ${outputDir}/mutation + if [ ! -d ${output_dir}/mutation ];then + mkdir ${output_dir}/mutation fi table_annovar.pl \ - ${somatic_hc_vcf} \ + ${vcf} \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ - --outfile ${outputDir}/mutation/${name}.snp.indel.Somatic.anno - - table_annovar.pl \ - ${germline_vcf} \ - /dataseq/jmdna/software/annovar/humandb/ \ - -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ - -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ - -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ - --intronhgvs 50 \ - -operation g,f,f,f,f,f,f,f,f,f,r \ - --outfile ${outputDir}/mutation/${name}.snp.indel.Germline.anno - - table_annovar.pl \ - ${loh_hc_vcf} \ - /dataseq/jmdna/software/annovar/humandb/ \ - -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ - -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ - -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ - --intronhgvs 50 \ - -operation g,f,f,f,f,f,f,f,f,f,r \ - --outfile ${outputDir}/mutation/${name}.snp.indel.LOH.anno - - java -jar /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar -T VariantAnnotator \ - -R ${ref} \ - -I ${rmdupBam} \ - -V ${somatic_hc_vcf} \ - -o ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \ - --annotation TandemRepeatAnnotator - - grep -v "^##" ${outputDir}/mutation/${name}.TandemRepeatAnnotator.vcf \ - |cut -f8| paste ${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt - \ - > ${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt + --outfile ${output_dir}/mutation/${prefix} >>> output { - String somatic_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno.txt" - String germline_anno = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno.txt" - String somatic_all_anno = "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno.txt" + String anno = "${output_dir}/mutation/${prefix}.hg19_multianno.txt" + } +} + +task dealwithsnvindel { + String name + String anno + String project + String output_dir + String umi + String tumor_rmdup_bam + + command <<< + + if [ ! -d ${output_dir}/mutation ];then + mkdir ${output_dir}/mutation + fi + + if ${umi} ;then + filter_snpindel_common.pl \ + ${anno} \ + ${project} \ + c \ + ${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered_pre.txt \ + ${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt \ + ${output_dir}/mutation/${name}.snp.indel.anno.hg19_multianno_tag.txt + + filter_snpindel_umi_correct_f1r1.py \ + ${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered_pre.txt \ + ${tumor_rmdup_bam} \ + ${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt + + else + filter_snpindel_common.pl \ + ${anno} \ + ${project} \ + t \ + ${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt \ + ${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt \ + ${output_dir}/mutation/${name}.snp.indel.anno.hg19_multianno_tag.txt + + >>> + output { + String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt" + String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt" } } task tmb { String codesDir String name - String outputDir + String output_dir String somatic_anno command <<< - perl ${codesDir}/tmb.pl ${outputDir} ${name} + perl ${codesDir}/tmb.pl ${output_dir} ${name} >>> output { - String tmb="${outputDir}/mutation/${name}.tmb.txt" + String tmb="${output_dir}/mutation/${name}.tmb.txt" } } @@ -137,7 +99,7 @@ task fusion { String name String ref String codesDir - String outputDir + String output_dir String rmdupBam String cancer String project @@ -145,48 +107,48 @@ task fusion { command <<< - if [ ! -d ${outputDir}/fusion ];then - mkdir ${outputDir}/fusion + if [ ! -d ${output_dir}/fusion ];then + mkdir ${output_dir}/fusion fi # Extract the discordant paired-end alignments. - samtools view -b -F 1294 ${rmdupBam} > ${outputDir}/fusion/${name}.discordants.bam + samtools view -b -F 1294 ${rmdupBam} > ${output_dir}/fusion/${name}.discordants.bam # Extract the split-read alignments samtools view -h ${rmdupBam} \ | /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \ | samtools view -Sb - \ - > ${outputDir}/fusion/${name}.splitters.bam + > ${output_dir}/fusion/${name}.splitters.bam lumpyexpress \ -B ${rmdupBam} \ - -S ${outputDir}/fusion/${name}.splitters.bam \ - -D ${outputDir}/fusion/${name}.discordants.bam \ - -o ${outputDir}/fusion/${name}.fusion.vcf + -S ${output_dir}/fusion/${name}.splitters.bam \ + -D ${output_dir}/fusion/${name}.discordants.bam \ + -o ${output_dir}/fusion/${name}.fusion.vcf - perl ${codesDir}/fusion.filter.pl ${outputDir}/fusion/${name}.fusion.vcf ${outputDir}/fusion/${name}.fusion.filter.vcf + perl ${codesDir}/fusion.filter.pl ${output_dir}/fusion/${name}.fusion.vcf ${output_dir}/fusion/${name}.fusion.filter.vcf svtyper \ -B ${rmdupBam} \ - -i ${outputDir}/fusion/${name}.fusion.filter.vcf \ + -i ${output_dir}/fusion/${name}.fusion.filter.vcf \ -T ${ref} \ - -o ${outputDir}/fusion/${name}.fusion.gt.vcf + -o ${output_dir}/fusion/${name}.fusion.gt.vcf table_annovar.pl \ - ${outputDir}/fusion/${name}.fusion.gt.vcf \ + ${output_dir}/fusion/${name}.fusion.gt.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -operation g \ - --outfile ${outputDir}/fusion/${name}.fusion + --outfile ${output_dir}/fusion/${name}.fusion - perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${outputDir} ${name} - perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${outputDir} ${project} ${cancer} + perl ${codesDir}/fusion.reanno.pl ${tumor_bamdst_depth} ${output_dir} ${name} + perl /home/jm001/test_duantao/database_update/codes/682/fusion_targetTherapy.pl ${codesDir} ${name} ${output_dir} ${project} ${cancer} >>> output { - String fusion = "${outputDir}/fusion/${name}.fusion.pos.txt" + String fusion = "${output_dir}/fusion/${name}.fusion.pos.txt" } } @@ -195,7 +157,7 @@ task tumor_content { String tumor_pileup String normal_pileup String ref - String outputDir + String output_dir String codesDir String gc_wiggle = "/dataseq/jmdna/codes/pancancer_controlsample/hg19.gc200Base.txt.gz" @@ -206,16 +168,16 @@ task tumor_content { -F ${ref} \ -n ${normal_pileup} \ -t ${tumor_pileup} \ - | gzip > ${outputDir}/qc/target_${name}.200base.seqz.gz + | gzip > ${output_dir}/qc/target_${name}.200base.seqz.gz - sequenza-utils seqz_binning -w 200 -s ${outputDir}/qc/target_${name}.200base.seqz.gz \ - | gzip > ${outputDir}/qc/target_${name}.200base.small.seqz.gz + sequenza-utils seqz_binning -w 200 -s ${output_dir}/qc/target_${name}.200base.seqz.gz \ + | gzip > ${output_dir}/qc/target_${name}.200base.small.seqz.gz - Rscript ${codesDir}/sequenza.R ${name} ${outputDir}/qc/target_${name}.200base.small.seqz.gz ${outputDir}/qc/sequenza || echo "sequenza failed!" + Rscript ${codesDir}/sequenza.R ${name} ${output_dir}/qc/target_${name}.200base.small.seqz.gz ${output_dir}/qc/sequenza || echo "sequenza failed!" >>> output { - String purity = "${outputDir}/qc/sequenza/${name}_CP_contours.pdf" + String purity = "${output_dir}/qc/sequenza/${name}_CP_contours.pdf" } } @@ -228,7 +190,7 @@ task cnvkit { String normal_rmdupBam String ref String bed - String outputDir + String output_dir String cancer String codesDir String project @@ -237,8 +199,8 @@ task cnvkit { command <<< - if [ ! -d ${outputDir}/cnvkit ];then - mkdir ${outputDir}/cnvkit + if [ ! -d ${output_dir}/cnvkit ];then + mkdir ${output_dir}/cnvkit fi cnvkit.py batch \ @@ -247,149 +209,128 @@ task cnvkit { --targets ${bed} \ --fasta ${ref} \ --access ${accessBed} \ - --output-reference ${outputDir}/cnvkit/${normal}_reference.cnn \ + --output-reference ${output_dir}/cnvkit/${normal}_reference.cnn \ --annotate ${annotateGene} \ - --drop-low-coverage --scatter --output-dir ${outputDir}/cnvkit + --drop-low-coverage --scatter --output-dir ${output_dir}/cnvkit cnvkit.py scatter \ - ${outputDir}/cnvkit/${tumor}.rmdup.cnr -s ${outputDir}/cnvkit/${tumor}.rmdup.cns \ + ${output_dir}/cnvkit/${tumor}.rmdup.cnr -s ${output_dir}/cnvkit/${tumor}.rmdup.cns \ --y-max 3 --y-min -3 \ --title ${tumor}.cns \ - -o ${outputDir}/cnvkit/${tumor}.cnv.png + -o ${output_dir}/cnvkit/${tumor}.cnv.png - perl ${codesDir}/log2_cn.pl ${outputDir}/cnvkit/${tumor}.rmdup.cns ${outputDir}/cnvkit/${tumor}.rmdup.cns.cn - perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${outputDir} ${project} ${cancer} + perl ${codesDir}/log2_cn.pl ${output_dir}/cnvkit/${tumor}.rmdup.cns ${output_dir}/cnvkit/${tumor}.rmdup.cns.cn + perl /home/jm001/test_duantao/database_update/codes/682/cnv_targetTherapy.pl ${codesDir} ${tumor} ${output_dir} ${project} ${cancer} >>> output { - String cns = "${outputDir}/cnvkit/${tumor}.rmdup.cns" - String png = "${outputDir}/cnvkit/${tumor}.cnv.png" + String cns = "${output_dir}/cnvkit/${tumor}.rmdup.cns" + String png = "${output_dir}/cnvkit/${tumor}.cnv.png" } } task chemo { String codesDir - String outputDir + String output_dir String project String normal String rmdupBam command <<< - if [ ! -d ${outputDir}/chemo ];then - mkdir ${outputDir}/chemo + if [ ! -d ${output_dir}/chemo ];then + mkdir ${output_dir}/chemo fi - ${codesDir}/chemo/chemo_panel.py -p ${project} -o ${outputDir} --n ${normal} + ${codesDir}/chemo/chemo_panel.py -p ${project} -o ${output_dir} --n ${normal} >>> } task msi { String bed String name - String outputDir + String output_dir String tumor_rmdupBam String normal_rmdupBam command <<< - if [ ! -d ${outputDir}/msi ];then - mkdir ${outputDir}/msi + if [ ! -d ${output_dir}/msi ];then + mkdir ${output_dir}/msi fi msisensor2 msi -d /dataseq/jmdna/software/msisensor2/hg19.microsatellites.list \ -n ${normal_rmdupBam} \ -t ${tumor_rmdupBam} \ - -e ${bed} -b 10 -o ${outputDir}/msi/${name}.msi + -e ${bed} -b 10 -o ${output_dir}/msi/${name}.msi >>> output { - String target="${outputDir}/MSI/${name}.msi" + String target="${output_dir}/MSI/${name}.msi" } } task hla { String inputDir - String outputDir + String output_dir String normal command <<< - if [ ! -d ${outputDir}/neoantigen ];then - mkdir -p ${outputDir}/neoantigen/HLA + if [ ! -d ${output_dir}/neoantigen ];then + mkdir -p ${output_dir}/neoantigen/HLA fi razers3 -tc 10 -i 95 -m 1 -dr 0 \ - -o ${outputDir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ + -o ${output_dir}/neoantigen/HLA/fished_1.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ ${inputDir}/*_${normal}_*1.fq.gz - samtools bam2fq ${outputDir}/neoantigen/HLA/fished_1.bam > ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq - rm ${outputDir}/neoantigen/HLA/fished_1.bam + samtools bam2fq ${output_dir}/neoantigen/HLA/fished_1.bam > ${output_dir}/neoantigen/HLA/${normal}_1_fished.fastq + rm ${output_dir}/neoantigen/HLA/fished_1.bam razers3 -tc 10 -i 95 -m 1 -dr 0 \ - -o ${outputDir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ + -o ${output_dir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/data/hla_reference_dna.fasta \ ${inputDir}/*_${normal}_*2.fq.gz - samtools bam2fq ${outputDir}/neoantigen/HLA/fished_2.bam > ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq - rm ${outputDir}/neoantigen/HLA/fished_2.bam + samtools bam2fq ${output_dir}/neoantigen/HLA/fished_2.bam > ${output_dir}/neoantigen/HLA/${normal}_2_fished.fastq + rm ${output_dir}/neoantigen/HLA/fished_2.bam /dataseq/jmdna/software/OptiType-1.3.5/OptiTypePipeline.py \ - -i ${outputDir}/neoantigen/HLA/${normal}_1_fished.fastq ${outputDir}/neoantigen/HLA/${normal}_2_fished.fastq \ - --dna -v --prefix ${normal} -o ${outputDir}/neoantigen/HLA/ + -i ${output_dir}/neoantigen/HLA/${normal}_1_fished.fastq ${output_dir}/neoantigen/HLA/${normal}_2_fished.fastq \ + --dna -v --prefix ${normal} -o ${output_dir}/neoantigen/HLA/ >>> output { - String hla = "${outputDir}/neoantigen/HLA/${normal}_result.tsv" + String hla = "${output_dir}/neoantigen/HLA/${normal}_result.tsv" } } task neoantigen { String codesDir - String outputDir + String output_dir String name String normal String somatic_hc_vcf String hla command <<< - sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${outputDir} ${name} ${name} ${codesDir} + sh /home/jm001/test_duantao/database_update/test_project/20230814_test/predict_neoantigen.sh ${output_dir} ${name} ${name} ${codesDir} >>> output { - String neoantigen = "${outputDir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt" - } -} - -task dealwithsnvindel { - String codesDir - String name - String somatic_all_anno - String germline_anno - String project - String outputDir - String cancer - - command <<< - perl ${codesDir}/pick_variant.pl ${outputDir} ${name} - perl ${codesDir}/pick_mut_splice_promoter.pl ${codesDir} ${name} ${outputDir} ${project} - perl /home/jm001/test_duantao/database_update/codes/682/targetTherapy.pl ${name} ${outputDir} ${project} ${cancer} - perl /home/jm001/test_duantao/database_update/codes/682/germline_targetTherapy.pl ${name} ${outputDir} ${project} ${cancer} - >>> - output { - String snvindel_filtered= "${outputDir}/mutation/${name}.snp.indel.Somatic.annoall.hg19_multianno_filtered.txt" - String germline_filtered = "${outputDir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt" + String neoantigen = "${output_dir}/neoantigen/MHC_Class_I/${name}.all_epitopes.netchop.txt" } } task hereditary { String codesDir String name - String outputDir + String output_dir String project String germline_filtered command <<< - ${codesDir}/hereditary/hereditary.py -p ${project} -o ${outputDir} --n ${name} + ${codesDir}/hereditary/hereditary.py -p ${project} -o ${output_dir} --n ${name} >>> output { - String hereditary_pre = "${outputDir}/hereditary/${name}.hereditary.pre.txt" + String hereditary_pre = "${output_dir}/hereditary/${name}.hereditary.pre.txt" } } @@ -398,101 +339,101 @@ task conpair { String name String tumor_rmdupBam String normal_rmdupBam - String outputDir + String output_dir String ref command <<< - if [ ! -d ${outputDir}/conpair ];then - mkdir -p ${outputDir}/conpair + if [ ! -d ${output_dir}/conpair ];then + mkdir -p ${output_dir}/conpair fi python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${tumor_rmdupBam} \ - -O ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ + -O ${output_dir}/conpair/${name}.tumor.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar python3 /dataseq/jmdna/software/Conpair-master/scripts/run_gatk_pileup_for_sample.py \ -M /dataseq/jmdna/software/Conpair-master/data/markers/GRCh37.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.bed \ -B ${normal_rmdupBam} \ - -O ${outputDir}/conpair/${name}.normal.gatk.mpileup \ + -O ${output_dir}/conpair/${name}.normal.gatk.mpileup \ -R ${ref} \ -G /dataseq/jmdna/software/GenomeAnalysisTK.3.7.jar - sed -i 's/^chr//g' ${outputDir}/conpair/${name}.tumor.gatk.mpileup - sed -i 's/^chr//g' ${outputDir}/conpair/${name}.normal.gatk.mpileup + sed -i 's/^chr//g' ${output_dir}/conpair/${name}.tumor.gatk.mpileup + sed -i 's/^chr//g' ${output_dir}/conpair/${name}.normal.gatk.mpileup python3 /dataseq/jmdna/software/Conpair-master/scripts/verify_concordance.py \ -H \ - -T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ - -N ${outputDir}/conpair/${name}.normal.gatk.mpileup \ - -O ${outputDir}/conpair/${name}_concordance.txt + -T ${output_dir}/conpair/${name}.tumor.gatk.mpileup \ + -N ${output_dir}/conpair/${name}.normal.gatk.mpileup \ + -O ${output_dir}/conpair/${name}_concordance.txt python3 /dataseq/jmdna/software/Conpair-master/scripts/estimate_tumor_normal_contamination.py \ - -T ${outputDir}/conpair/${name}.tumor.gatk.mpileup \ - -N ${outputDir}/conpair/${name}.normal.gatk.mpileup \ - -O ${outputDir}/conpair/${name}_contamination.txt + -T ${output_dir}/conpair/${name}.tumor.gatk.mpileup \ + -N ${output_dir}/conpair/${name}.normal.gatk.mpileup \ + -O ${output_dir}/conpair/${name}_contamination.txt >>> output { - String concordance = "${outputDir}/conpair/${name}_concordance.txt" - String contamination = "${outputDir}/conpair/${name}_contamination.txt" + String concordance = "${output_dir}/conpair/${name}_concordance.txt" + String contamination = "${output_dir}/conpair/${name}_contamination.txt" } } task mmr { String codesDir String name - String outputDir + String output_dir String germline_filtered command <<< - if [ ! -d ${outputDir}/MMR ];then - mkdir -p ${outputDir}/MMR + if [ ! -d ${output_dir}/MMR ];then + mkdir -p ${output_dir}/MMR fi - perl ${codesDir}/mmr_controlsample.pl ${outputDir} ${name} + perl ${codesDir}/mmr_controlsample.pl ${output_dir} ${name} >>> output { - String mmr = "${outputDir}/MMR/${name}_mmr.txt" + String mmr = "${output_dir}/MMR/${name}_mmr.txt" } } task hrr { String codesDir String name - String outputDir + String output_dir String germline_filtered command <<< - if [ ! -d ${outputDir}/HRR ];then - mkdir -p ${outputDir}/HRR + if [ ! -d ${output_dir}/HRR ];then + mkdir -p ${output_dir}/HRR fi - perl ${codesDir}/hrr_controlsample_tissue.pl ${outputDir} ${name} + perl ${codesDir}/hrr_controlsample_tissue.pl ${output_dir} ${name} >>> output { - String hrr = "${outputDir}/HRR/${name}_hrr.txt" + String hrr = "${output_dir}/HRR/${name}_hrr.txt" } } task hotspot { String name - String outputDir + String output_dir String ref String rmdupBam String codesDir command <<< - if [ ! -d ${outputDir}/mutation/hotspot/ ];then - mkdir -p ${outputDir}/mutation/hotspot/ + if [ ! -d ${output_dir}/mutation/hotspot/ ];then + mkdir -p ${output_dir}/mutation/hotspot/ fi samtools mpileup -Bq 20 -Q 20 \ -f ${ref} \ -l ${codesDir}/hotspot.bed \ - -o ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ + -o ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \ ${rmdupBam} java -jar $VARSCAN mpileup2cns \ - ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ + ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \ --min-var-freq 0.005 \ --min-avg-qual 20 \ --output-vcf 1 \ @@ -500,10 +441,10 @@ task hotspot { --p-value 0.99 \ --min-reads2 2 \ --strand-filter 0 \ - > ${outputDir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf + > ${output_dir}/mutation/hotspot/${name}.hotspot.L.snp.indel.vcf java -jar $VARSCAN mpileup2cns \ - ${outputDir}/mutation/hotspot/${name}.hotspot.pileup \ + ${output_dir}/mutation/hotspot/${name}.hotspot.pileup \ --min-var-freq 0.01 \ --min-avg-qual 20 \ --output-vcf 1 \ @@ -511,32 +452,32 @@ task hotspot { --p-value 0.05 \ --min-reads2 3 \ --strand-filter 1 \ - > ${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf + > ${output_dir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf - perl ${codesDir}/hotspot.hvl.pl ${outputDir} ${name} + perl ${codesDir}/hotspot.hvl.pl ${output_dir} ${name} - if [ -e "${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then + if [ -e "${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf" ]; then table_annovar.pl \ - ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \ + ${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene \ -argument '-hgvs' \ -operation g \ - --outfile ${outputDir}/mutation/hotspot/${name}.hotspot.snp.indel.anno - perl ${codesDir}/hotspot.filter.pl ${outputDir} ${name} + --outfile ${output_dir}/mutation/hotspot/${name}.hotspot.snp.indel.anno + perl ${codesDir}/hotspot.filter.pl ${output_dir} ${name} fi >>> output { - String hotspot = "${outputDir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf" + String hotspot = "${output_dir}/mutation/hotspot/${name}.hotspot.H.snp.indel.vcf" } } task auto_report { String cancer String codesDir - String outputDir + String output_dir String normal String tumor @@ -554,28 +495,28 @@ task auto_report { command <<< - if [ ! -d ${outputDir}/report ];then - mkdir -p ${outputDir}/report + if [ ! -d ${output_dir}/report ];then + mkdir -p ${output_dir}/report fi - perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${outputDir} ${cancer} - python3 ${codesDir}/drug_dedup.py ${outputDir} ${tumor} - perl ${codesDir}/file_format_change.pl ${outputDir} ${tumor} - python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${outputDir} ${tumor} ${normal} ${cancer} + perl /home/jm001/test_duantao/database_update/codes/682/indication.pl ${output_dir} ${cancer} + python3 ${codesDir}/drug_dedup.py ${output_dir} ${tumor} + perl ${codesDir}/file_format_change.pl ${output_dir} ${tumor} + python3 ${codesDir}/report_template/682gene_tissue_control_report.py ${output_dir} ${tumor} ${normal} ${cancer} - ln -s ${cnv_cns} ${outputDir}/report/ - ln -s ${cnv_png} ${outputDir}/report/ + ln -s ${cnv_cns} ${output_dir}/report/ + ln -s ${cnv_png} ${output_dir}/report/ - ln -s ${fusion_pos} ${outputDir}/report/ - ln -s ${snvindel_filtered} ${outputDir}/report/ + ln -s ${fusion_pos} ${output_dir}/report/ + ln -s ${snvindel_filtered} ${output_dir}/report/ - ln -s ${tmb} ${outputDir}/report/ + ln -s ${tmb} ${output_dir}/report/ - ln -s ${mmr} ${outputDir}/report/ + ln -s ${mmr} ${output_dir}/report/ - ln -s ${hrr} ${outputDir}/report/ + ln -s ${hrr} ${output_dir}/report/ - ln -s ${hereditary_pre} ${outputDir}/report/ + ln -s ${hereditary_pre} ${output_dir}/report/ >>> } \ No newline at end of file