task mutation_calling_umi { String name String output_dir String rmdup_bam String ref String bed command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi #1条call # 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多 # -r 3 是指有3条这样样的reads支撑 # -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信 java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.001 \ -N ${name} \ -b ${rmdup_bam} \ -UN -Q 20 -m 3 -r 3 -th 10 -z 1 -c 1 -S 2 -E 3 -g 4 ${bed} \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \ -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf #提取>=2条矫正的序列 bam_fetch.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam # 保证 1r call mut umi family 里面有2条reads #2条矫正的call java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ -f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \ -UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf #merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 correct_umi_1r_plus_2r.pl \ ${output_dir}/mutation/${name}.1r.snp.indel.vcf \ ${output_dir}/mutation/${name}.2r.snp.indel.vcf \ ${output_dir}/mutation/${name}.snp.indel.raw.vcf correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref} cp ${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf python ~/project/pipeline/workflow/script/tools/vcf_filter.py \ -i ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \ -e 'INFO/AF[0] > 0.1' \ -o ${output_dir}/mutation/${name}.snp.indel.germline.vcf >>> output { String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf" String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf" } } task mutation_calling_tissue { String name String bed String ref String output_dir String rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi # vardict java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.01 \ -N ${name} \ -b ${rmdup_bam} \ -UN \ -Q 20 \ -m 3 \ -r 3 \ -z 1 \ -th 10 \ -c 1 -S 2 -E 3 -g 4 ${bed} \ |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 \ >${output_dir}/mutation/${name}.snp.indel.raw.vcf correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref} cp ${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf python ~/project/pipeline/workflow/script/tools/vcf_filter.py \ -i ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \ -e 'INFO/AF[0] > 0.1' \ -o ${output_dir}/mutation/${name}.snp.indel.germline.vcf >>> output { String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf" String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline_vcf.vcf" } } task mutation_calling_tissue_control { String name String bed String ref String output_dir String tumor_rmdup_bam String normal_rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.01 \ -N ${name} \ -b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \ -UN \ -Q 20 \ -m 3 \ -r 3 \ -th 20 \ -z 1 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \ >${output_dir}/mutation/${name}.snp.indel.raw.vcf correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref} python ~/project/pipeline/workflow/script/tools/vcf_filter.py -i ${output_dir}/mutation/${name}.snp.indel.vcf \ -o ${output_dir}/mutation/${name}.snp.indel.germline.vcf \ -e 'INFO/STATUS="Germline"' python ~/project/pipeline/workflow/script/tools/vcf_filter.py -i ${output_dir}/mutation/${name}.snp.indel.vcf \ -o ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \ -e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )' >>> output { String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf" String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf" } } task mutation_calling_umi_control { String name String bed String ref String output_dir String tumor_rmdup_bam String normal_rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi # 对照样本 java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.01 \ -N ${name} \ -b ${normal_rmdup_bam} \ -UN \ -Q 20 \ -m 3 \ -r 3 \ -th 10 \ -c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf # 实验样本 java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.001 \ -N ${name} \ -b ${tumor_rmdup_bam} \ -UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \ -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf #提取>=2条矫正的序列 func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam # 保证 1r call mut umi family 里面有2条reads #2条矫正的call java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ -f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \ -UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf # merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 filter_snpindel_umi_1r_plus_2r.pl \ ${output_dir}/mutation/${name}.1r.snp.indel.vcf \ ${output_dir}/mutation/${name}.2r.snp.indel.vcf \ ${output_dir}/mutation/${name}.snp.indel.pre_raw.vcf # 去除normal 中的突变位点 correct_umi_subnormal.pl \ ${output_dir}/mutation/${name}.snp.indel.pre_raw.vcf \ ${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf \ ${output_dir}/mutation/${name}.snp.indel.raw_somaitc.vcf correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf ${output_dir}/mutation/${name}.snp.indel.germline.vcf ${ref} correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw_somaitc.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf ${ref} >>> output { String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf" String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf" } } task annovar { String prefix String output_dir String ref String vcf command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi table_annovar.pl \ ${vcf} \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ -intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ -outfile ${output_dir}/mutation/${prefix} \ -dot2underline >>> output { String anno = "${output_dir}/mutation/${prefix}.hg19_multianno.txt" } } task filter_umi { String name String anno String project String output_dir String tumor_rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi filter_snpindel \ ${anno} \ ${project} \ c \ ${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno.filtered.pre.txt \ ${output_dir}/mutation/${name}.snp.indel.germline.hg19_multianno.filtered.pre.txt \ ${output_dir}/mutation/${name}.snp.indel.hg19_multianno.tag.txt filter_snpindel_umi_correct_overlap_reads.py \ ${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno_filtered_pre.txt \ ${tumor_rmdup_bam} \ ${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno_filtered.txt >>> output { String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt" String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt" } } task filter_tissue { String name String anno String project String output_dir String tumor_rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi filter_snpindel.pl \ ${anno} \ ${project} \ t \ ${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno.filtered.txt \ ${output_dir}/mutation/${name}.snp.indel.germline.hg19_multianno.filtered.txt \ ${output_dir}/mutation/${name}.snp.indel.hg19_multianno.tag.txt >>> output { String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt" String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt" } } workflow call_mutation { String tumor String tumor_rmdup_bam String? normal String? normal_rmdup_bam Boolean umi String output_dir String ref String bed # pipe 执行 mutation_calling => annovar => filter # 单样本 if (!defined(normal)) { if (umi) { call mutation_calling_umi { input: name=tumor, output_dir=output_dir, ref=ref, bed=bed, rmdup_bam=tumor_rmdup_bam } call annovar as anno_somatic_umi { input: prefix="${tumor}.snp.indel.somatic", output_dir=output_dir, ref=ref, vcf=mutation_calling_umi.somatic_vcf } call annovar as anno_germline_umi { input: prefix="${tumor}.snp.indel.germline", output_dir=output_dir, ref=ref, vcf=mutation_calling_umi.germline_vcf } } if (!umi) { call mutation_calling_tissue { input: name=tumor, output_dir=output_dir, ref=ref, bed=bed, rmdup_bam=tumor_rmdup_bam } call annovar as anno_somatic_tissue { input: prefix="${tumor}.snp.indel.somatic", output_dir=output_dir, ref=ref, vcf=mutation_calling_tissue.somatic_vcf } call annovar as anno_germline_tissue { input: prefix="${tumor}.snp.indel.germline", output_dir=output_dir, ref=ref, vcf=mutation_calling_tissue.germline_vcf } } } # 双样本 if (defined(normal)) { if (umi) { call mutation_calling_umi_control { input: name=tumor, output_dir=output_dir, ref=ref, bed=bed, tumor_rmdup_bam=tumor_rmdup_bam, normal_rmdup_bam=normal_rmdup_bam } call annovar as anno_somatic_umi_control { input: prefix="${tumor}.snp.indel.somatic", output_dir=output_dir, ref=ref, vcf=mutation_calling_umi_control.somatic_vcf } call annovar as anno_germline_umi_control { input: prefix="${tumor}.snp.indel.germline", output_dir=output_dir, ref=ref, vcf=mutation_calling_umi_control.germline_vcf } } if (!umi) { call mutation_calling_tissue_control { input: name=tumor, output_dir=output_dir, ref=ref, bed=bed, tumor_rmdup_bam=tumor_rmdup_bam, normal_rmdup_bam=normal_rmdup_bam } call annovar as anno_somatic_tissue_control { input: prefix="${tumor}.snp.indel.somatic", output_dir=output_dir, ref=ref, vcf=mutation_calling_tissue_control.somatic_vcf } call annovar as anno_germline_tissue_control { input: prefix="${tumor}.snp.indel.germline", output_dir=output_dir, ref=ref, vcf=mutation_calling_tissue_control.germline_vcf } } } output { String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.somatic.vcf" String germline_vcf = "${output_dir}/mutation/${tumor}.snp.indel.germline.vcf" } }