pipeline/wdl/call_mutation.wdl

215 lines
8.1 KiB
Plaintext
Raw Normal View History

2023-09-27 10:47:03 +08:00
task mutation_calling_umi {
String name
String output_dir
String rmdup_bam
String ref
String bed
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
#1条call
# 这个情况是reads数目只有1但是如果去掉了这个reads数导致数据量减少很多
# -r 3 是指有3条这样样的reads支撑
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.001 \
-N ${name} \
-b ${rmdup_bam} \
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads
#2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
#merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf"
}
}
task mutation_calling_tissue {
String name
String bed
String ref
String output_dir
String rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
# vardict
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b ${rmdup_bam} \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-th 10 \
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
}
}
task mutation_calling_tissue_control {
String name
String bed
String ref
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
}
}
workflow call_mutation {
String tumor
String tumor_rmdup_bam
String? normal
String? normal_rmdup_bam
Boolean umi
String output_dir
String ref
String bed
scatter(name in [tumor, normal]) {
if (defined(name)) {
if (name==tumor) {
if (umi) {
call mutation_calling_umi as tumor_mutation_calling_umi {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
}
}
if (!umi) {
# 单样本模式normal没有定义
if (name==select_first([normal, tumor])) {
call mutation_calling_tissue as tumor_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
# 双样本模式normal有定义
if (name!=select_first([normal, tumor])) {
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
}
}
}
if (name==select_first([normal, 'None'])) {
if (umi) {
call mutation_calling_tissue as normal_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
}
}
}
output {
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
}
}