pipeline/wdl/call_mutation.wdl

438 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

task mutation_calling_umi {
String name
String output_dir
String rmdup_bam
String ref
String bed
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
#1条call
# 这个情况是reads数目只有1但是如果去掉了这个reads数导致数据量减少很多
# -r 3 是指有3条这样样的reads支撑
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.001 \
-N ${name} \
-b ${rmdup_bam} \
-UN -Q 20 -m 3 -r 3 -th 10 -z 1 -c 1 -S 2 -E 3 -g 4 ${bed} \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列
bam_fetch.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads
#2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
#merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
correct_umi_1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.raw.vcf
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref}
cp ${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf
python ~/project/pipeline/workflow/script/tools/vcf_filter.py \
-i ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \
-e 'INFO/AF[0] > 0.1' \
-o ${output_dir}/mutation/${name}.snp.indel.germline.vcf
>>>
output {
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf"
}
}
task mutation_calling_tissue {
String name
String bed
String ref
String output_dir
String rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
# vardict
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b ${rmdup_bam} \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-z 1 \
-th 10 \
-c 1 -S 2 -E 3 -g 4 ${bed} \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 \
>${output_dir}/mutation/${name}.snp.indel.raw.vcf
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref}
cp ${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf
python ~/project/pipeline/workflow/script/tools/vcf_filter.py \
-i ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \
-e 'INFO/AF[0] > 0.1' \
-o ${output_dir}/mutation/${name}.snp.indel.germline.vcf
>>>
output {
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline_vcf.vcf"
}
}
task mutation_calling_tissue_control {
String name
String bed
String ref
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-th 20 \
-z 1 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
>${output_dir}/mutation/${name}.snp.indel.raw.vcf
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref}
python ~/project/pipeline/workflow/script/tools/vcf_filter.py -i ${output_dir}/mutation/${name}.snp.indel.vcf \
-o ${output_dir}/mutation/${name}.snp.indel.germline.vcf \
-e 'INFO/STATUS="Germline"'
python ~/project/pipeline/workflow/script/tools/vcf_filter.py -i ${output_dir}/mutation/${name}.snp.indel.vcf \
-o ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \
-e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )'
>>>
output {
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf"
}
}
task mutation_calling_umi_control {
String name
String bed
String ref
String output_dir
String tumor_rmdup_bam
String normal_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
# 对照样本
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.01 \
-N ${name} \
-b ${normal_rmdup_bam} \
-UN \
-Q 20 \
-m 3 \
-r 3 \
-th 10 \
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf
# 实验样本
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
-G ${ref} \
-f 0.001 \
-N ${name} \
-b ${tumor_rmdup_bam} \
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
#提取>=2条矫正的序列
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
# 保证 1r call mut umi family 里面有2条reads
#2条矫正的call
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
# merge突变以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域AF<0.01)进行矫正。
filter_snpindel_umi_1r_plus_2r.pl \
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
${output_dir}/mutation/${name}.snp.indel.pre_raw.vcf
# 去除normal 中的突变位点
correct_umi_subnormal.pl \
${output_dir}/mutation/${name}.snp.indel.pre_raw.vcf \
${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf \
${output_dir}/mutation/${name}.snp.indel.raw_somaitc.vcf
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf ${output_dir}/mutation/${name}.snp.indel.germline.vcf ${ref}
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw_somaitc.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf ${ref}
>>>
output {
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf"
}
}
task annovar {
String prefix
String output_dir
String ref
String vcf
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
table_annovar.pl \
${vcf} \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
-intronhgvs 50 \
-operation g,f,f,f,f,f,f,f,f,f,r \
-outfile ${output_dir}/mutation/${prefix} \
-dot2underline
>>>
output {
String anno = "${output_dir}/mutation/${prefix}.hg19_multianno.txt"
}
}
task filter_umi {
String name
String anno
String project
String output_dir
String tumor_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
filter_snpindel \
${anno} \
${project} \
c \
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno.filtered.pre.txt \
${output_dir}/mutation/${name}.snp.indel.germline.hg19_multianno.filtered.pre.txt \
${output_dir}/mutation/${name}.snp.indel.hg19_multianno.tag.txt
filter_snpindel_umi_correct_overlap_reads.py \
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno_filtered_pre.txt \
${tumor_rmdup_bam} \
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno_filtered.txt
>>>
output {
String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt"
String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
}
}
task filter_tissue {
String name
String anno
String project
String output_dir
String tumor_rmdup_bam
command <<<
if [ ! -d ${output_dir}/mutation ];then
mkdir ${output_dir}/mutation
fi
filter_snpindel.pl \
${anno} \
${project} \
t \
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno.filtered.txt \
${output_dir}/mutation/${name}.snp.indel.germline.hg19_multianno.filtered.txt \
${output_dir}/mutation/${name}.snp.indel.hg19_multianno.tag.txt
>>>
output {
String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt"
String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
}
}
workflow call_mutation {
String tumor
String tumor_rmdup_bam
String? normal
String? normal_rmdup_bam
Boolean umi
String output_dir
String ref
String bed
# pipe 执行 mutation_calling => annovar => filter
# 单样本
if (!defined(normal)) {
if (umi) {
call mutation_calling_umi {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
}
call annovar as anno_somatic_umi {
input:
prefix="${tumor}.snp.indel.somatic",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_umi.somatic_vcf
}
call annovar as anno_germline_umi {
input:
prefix="${tumor}.snp.indel.germline",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_umi.germline_vcf
}
}
if (!umi) {
call mutation_calling_tissue {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
rmdup_bam=tumor_rmdup_bam
}
call annovar as anno_somatic_tissue {
input:
prefix="${tumor}.snp.indel.somatic",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_tissue.somatic_vcf
}
call annovar as anno_germline_tissue {
input:
prefix="${tumor}.snp.indel.germline",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_tissue.germline_vcf
}
}
}
# 双样本
if (defined(normal)) {
if (umi) {
call mutation_calling_umi_control {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
call annovar as anno_somatic_umi_control {
input:
prefix="${tumor}.snp.indel.somatic",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_umi_control.somatic_vcf
}
call annovar as anno_germline_umi_control {
input:
prefix="${tumor}.snp.indel.germline",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_umi_control.germline_vcf
}
}
if (!umi) {
call mutation_calling_tissue_control {
input:
name=tumor,
output_dir=output_dir,
ref=ref,
bed=bed,
tumor_rmdup_bam=tumor_rmdup_bam,
normal_rmdup_bam=normal_rmdup_bam
}
call annovar as anno_somatic_tissue_control {
input:
prefix="${tumor}.snp.indel.somatic",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_tissue_control.somatic_vcf
}
call annovar as anno_germline_tissue_control {
input:
prefix="${tumor}.snp.indel.germline",
output_dir=output_dir,
ref=ref,
vcf=mutation_calling_tissue_control.germline_vcf
}
}
}
output {
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.somatic.vcf"
String germline_vcf = "${output_dir}/mutation/${tumor}.snp.indel.germline.vcf"
}
}