pipeline/wdl/fusion.wdl

165 lines
4.6 KiB
Plaintext
Raw Normal View History

2023-10-10 11:09:16 +08:00
task rmdup_picard {
String name
String sorted_bam
String output_dir
String ref
command <<<
if [ ! -d ${output_dir}/fusion ];then
mkdir ${output_dir}/fusion
fi
2023-10-18 15:59:11 +08:00
java -XX:+UseParallelGC -XX:ParallelGCThreads=8 -Xmx12G -jar $PICARD MarkDuplicates \
2023-10-10 11:09:16 +08:00
I=${sorted_bam} \
O=${output_dir}/alignment/${name}.picard.rmdup.bam \
CREATE_INDEX=true \
M=${output_dir}/alignment/${name}.picard.rmdup.metrics.txt \
R=${ref}
>>>
output {
String rmdup_bam = "${output_dir}/alignment/${name}.picard.rmdup.bam"
}
}
task fusion_calling {
String name
String rmdup_bam
String output_dir
String ref
2023-10-18 15:59:11 +08:00
String tumor_bamdst_depth
2023-10-10 11:09:16 +08:00
command <<<
if [ ! -d ${output_dir}/fusion ];then
mkdir ${output_dir}/fusion
fi
# Extract the discordant paired-end alignments.
samtools view -b -F 1294 ${rmdup_bam} > ${output_dir}/fusion/${name}.discordants.bam
# Extract the split-read alignments
samtools view -h ${rmdup_bam} \
| /dataseq/jmdna/software/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin \
| samtools view -Sb - \
> ${output_dir}/fusion/${name}.splitters.bam
lumpyexpress \
-B ${rmdup_bam} \
-S ${output_dir}/fusion/${name}.splitters.bam \
-D ${output_dir}/fusion/${name}.discordants.bam \
2023-10-18 15:59:11 +08:00
-o ${output_dir}/fusion/${name}.fusion.raw.vcf
2023-11-01 10:09:29 +08:00
vcf_filter.py \
2023-10-18 15:59:11 +08:00
-i ${output_dir}/fusion/${name}.fusion.raw.vcf \
-e 'INFO/PE[0] >1 && INFO/SR[0] > 4 ' \
| svtyper -B ${rmdup_bam} -T ${ref} -o ${output_dir}/fusion/${name}.fusion.vcf
table_annovar.pl \
${output_dir}/fusion/${name}.fusion.vcf \
/dataseq/jmdna/software/annovar/humandb/ \
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
-protocol refGene \
-operation g \
2023-11-01 10:09:29 +08:00
-outfile ${output_dir}/fusion/${name}.fusion
2023-10-18 15:59:11 +08:00
filter_fusion.pl ${tumor_bamdst_depth} \
${output_dir}/fusion/${name}.fusion.hg19_multianno.vcf \
${output_dir}/fusion/${name}.fusion.hg19_multianno.filter.txt
2023-11-01 10:09:29 +08:00
vcf_filter.py \
-e 'INFO/SVTYPE = "DEL" | INFO/SVTYPE = "DUP" | INFO/SVTYPE = "INS" ' \
2023-10-18 15:59:11 +08:00
-i ${output_dir}/fusion/${name}.fusion.hg19_multianno.vcf \
-o ${output_dir}/fusion/${name}.longindel.hg19_multianno.vcf \
2023-10-10 11:09:16 +08:00
>>>
output {
2023-11-01 10:09:29 +08:00
String vcf_txt = "${output_dir}/fusion/${name}.fusion.hg19_multianno.filter.txt"
String longvcf = "${output_dir}/fusion/${name}.longindel.hg19_multianno.vcf"
2023-10-10 11:09:16 +08:00
}
}
2023-11-01 10:09:29 +08:00
task fusion_sum {
String name
String output_dir
String vcf_txt
String longvcf
String project
String cancer
command <<<
target_therapy_fusion.pl \
${vcf_txt} \
${longvcf} \
${output_dir}/fusion/${name}.fusion.hg19_multianno.filter \
${output_dir}/fusion/${name}.longindel.hg19_multianno.longindel.pos.txt ${project} ${cancer}
>>>
output {
String fusion = "${output_dir}/fusion/${name}.fusion.hg19_multianno.filter.txt"
}
}
2023-10-10 11:09:16 +08:00
workflow call_fusion {
String name
String sorted_bam
String rmdup_bam
String output_dir
String ref
2023-10-18 15:59:11 +08:00
Boolean umi
String tumor_bamdst_depth
2023-11-01 10:09:29 +08:00
String project
String cancer
2023-10-10 11:09:16 +08:00
if (umi) {
call rmdup_picard {
input:
name=name,
sorted_bam=sorted_bam,
output_dir=output_dir,
ref=ref
}
2023-10-18 15:59:11 +08:00
call fusion_calling as fusion_calling_umi {
2023-10-10 11:09:16 +08:00
input:
name=name,
rmdup_bam=rmdup_picard.rmdup_bam,
output_dir=output_dir,
2023-10-18 15:59:11 +08:00
ref=ref,
tumor_bamdst_depth=tumor_bamdst_depth
2023-10-10 11:09:16 +08:00
}
2023-11-01 10:09:29 +08:00
2023-10-10 11:09:16 +08:00
}
if (!umi) {
2023-10-18 15:59:11 +08:00
call fusion_calling as fusion_calling {
2023-10-10 11:09:16 +08:00
input:
name=name,
rmdup_bam=rmdup_bam,
output_dir=output_dir,
2023-10-18 15:59:11 +08:00
ref=ref,
tumor_bamdst_depth=tumor_bamdst_depth
2023-10-10 11:09:16 +08:00
}
2023-11-01 10:09:29 +08:00
call fusion_sum as fusion_sum {
input:
vcf_txt=fusion_calling.vcf_txt,
longvcf=fusion_calling.longvcf,
name=name,
output_dir=output_dir,
cancer=cancer,
project=project
}
2023-10-10 11:09:16 +08:00
}
output {
String fusion_vcf = "${output_dir}/fusion/${name}.fusion.vcf"
}
}