task mutation_calling_umi { String name String output_dir String rmdup_bam String ref String bed command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi #1条call # 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多 # -r 3 是指有3条这样样的reads支撑 # -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信 java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.001 \ -N ${name} \ -b ${rmdup_bam} \ -UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \ -N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf #提取>=2条矫正的序列 python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam # 保证 1r call mut umi family 里面有2条reads #2条矫正的call java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \ -f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \ -UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf #merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。 perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \ ${output_dir}/mutation/${name}.1r.snp.indel.vcf \ ${output_dir}/mutation/${name}.2r.snp.indel.vcf \ ${output_dir}/mutation/${name}.snp.indel.vcf table_annovar.pl \ ${output_dir}/mutation/${name}.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \ --intronhgvs 50 \ -operation g,f,f,f,f,f,f,f,f,f,r \ --outfile ${output_dir}/mutation/${name}.snp.indel.anno >>> output { String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf" } } task mutation_calling_tissue { String name String bed String ref String output_dir String rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi # vardict java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.01 \ -N ${name} \ -b ${rmdup_bam} \ -UN \ -Q 20 \ -m 3 \ -r 3 \ -th 10 \ -c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \ |/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf table_annovar.pl \ ${output_dir}/mutation/${name}.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ -operation g,f,f,f,f,f,f,f,f,f \ --intronhgvs 50 \ --outfile ${output_dir}/mutation/${name}.snp.indel.anno >>> output { String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf" } } task mutation_calling_tissue_control { String name String bed String ref String output_dir String tumor_rmdup_bam String normal_rmdup_bam command <<< if [ ! -d ${output_dir}/mutation ];then mkdir ${output_dir}/mutation fi java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \ -G ${ref} \ -f 0.01 \ -N ${name} \ -b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \ -UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \ | /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf table_annovar.pl \ ${output_dir}/mutation/${name}.snp.indel.vcf \ /dataseq/jmdna/software/annovar/humandb/ \ -buildver hg19 -nastring . -vcfinput -remove -otherinfo \ -protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \ -argument '-splicing_threshold 2 -hgvs',,,,,,,,, \ -operation g,f,f,f,f,f,f,f,f,f \ --intronhgvs 50 \ --outfile ${output_dir}/mutation/${name}.snp.indel.anno >>> output { String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf" } } workflow call_mutation { String tumor String tumor_rmdup_bam String? normal String? normal_rmdup_bam Boolean umi String output_dir String ref String bed scatter(name in [tumor, normal]) { if (defined(name)) { if (name==tumor) { if (umi) { call mutation_calling_umi as tumor_mutation_calling_umi { input: name=name, output_dir=output_dir, ref=ref, bed=bed, rmdup_bam=tumor_rmdup_bam } } if (!umi) { # 单样本模式,normal没有定义 if (name==select_first([normal, tumor])) { call mutation_calling_tissue as tumor_mutation_calling_tissue { input: name=name, output_dir=output_dir, ref=ref, bed=bed, rmdup_bam=normal_rmdup_bam } } # 双样本模式,normal有定义 if (name!=select_first([normal, tumor])) { call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control { input: name=name, output_dir=output_dir, ref=ref, bed=bed, tumor_rmdup_bam=tumor_rmdup_bam, normal_rmdup_bam=normal_rmdup_bam } } } } if (name==select_first([normal, 'None'])) { if (umi) { call mutation_calling_tissue as normal_mutation_calling_tissue { input: name=name, output_dir=output_dir, ref=ref, bed=bed, rmdup_bam=normal_rmdup_bam } } } } } output { String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf" String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf" } }