pipeline/wdl/call_mutation.wdl

215 lines
8.1 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

task mutation_calling_umi {
String name
String output_dir
String rmdup_bam
String ref
String bed
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
#1条call
# 这个情况是reads数目只有1但是如果去掉了这个reads数导致数据量减少很多
# -r 3 是指有3条这样样的reads支撑
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.001 \
-N ${name} \
-b ${rmdup_bam} \
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列
python3 /home/zhangchao/project/pipeline/control/script/fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads
#2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
#merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
perl /home/zhangchao/project/pipeline/control/script/1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
--intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.filter.flag.snp.indel.vcf"
}
}
task mutation_calling_tissue {
String name
String bed
String ref
String output_dir
String rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
# vardict
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b ${rmdup_bam} \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-th 10 \
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
}
}
task mutation_calling_tissue_control {
String name
String bed
String ref
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
-UN -Q 20 -m 3 -r 3 -th 20 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 > ${output_dir}/mutation/${name}.snp.indel.vcf
table_annovar.pl \
${output_dir}/mutation/${name}.snp.indel.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,, \
-operation g,f,f,f,f,f,f,f,f,f \
--intronhgvs 50 \
--outfile ${output_dir}/mutation/${name}.snp.indel.anno
>>>
output {
String vcf = "${output_dir}/mutation/${name}.snp.indel.vcf"
}
}
workflow call_mutation {
String tumor
String tumor_rmdup_bam
String? normal
String? normal_rmdup_bam
Boolean umi
String output_dir
String ref
String bed
scatter(name in [tumor, normal]) {
if (defined(name)) {
if (name==tumor) {
if (umi) {
call mutation_calling_umi as tumor_mutation_calling_umi {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
}
}
if (!umi) {
# 单样本模式normal没有定义
if (name==select_first([normal, tumor])) {
call mutation_calling_tissue as tumor_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
# 双样本模式normal有定义
if (name!=select_first([normal, tumor])) {
call mutation_calling_tissue_control as tumor_mutation_calling_tissue_control {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
}
}
}
if (name==select_first([normal, 'None'])) {
if (umi) {
call mutation_calling_tissue as normal_mutation_calling_tissue {
input:
name=name,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=normal_rmdup_bam
}
}
}
}
}
output {
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.vcf"
String somatic_nc_vcf = "${output_dir}/mutation/${normal}.snp.indel.vcf"
}
}