438 lines
16 KiB
Plaintext
438 lines
16 KiB
Plaintext
task mutation_calling_umi {
|
||
String name
|
||
String output_dir
|
||
String rmdup_bam
|
||
String ref
|
||
String bed
|
||
command <<<
|
||
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
#1条call
|
||
# 这个情况是reads数目只有1,但是如果去掉了这个reads数导致数据量减少很多
|
||
# -r 3 是指有3条这样样的reads支撑
|
||
# -f 是指频率 以2条方式的call出来的变异频率可以比1条的方式更可信
|
||
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||
-G ${ref} \
|
||
-f 0.001 \
|
||
-N ${name} \
|
||
-b ${rmdup_bam} \
|
||
-UN -Q 20 -m 3 -r 3 -th 10 -z 1 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
||
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
||
|
||
#提取>=2条矫正的序列
|
||
bam_fetch.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||
|
||
# 保证 1r call mut umi family 里面有2条reads
|
||
#2条矫正的call
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
||
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
||
|
||
#merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||
correct_umi_1r_plus_2r.pl \
|
||
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
||
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
||
${output_dir}/mutation/${name}.snp.indel.raw.vcf
|
||
|
||
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref}
|
||
cp ${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf
|
||
|
||
python ~/project/pipeline/workflow/script/tools/vcf_filter.py \
|
||
-i ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \
|
||
-e 'INFO/AF[0] > 0.1' \
|
||
-o ${output_dir}/mutation/${name}.snp.indel.germline.vcf
|
||
|
||
>>>
|
||
|
||
output {
|
||
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
|
||
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf"
|
||
}
|
||
}
|
||
|
||
task mutation_calling_tissue {
|
||
String name
|
||
String bed
|
||
String ref
|
||
String output_dir
|
||
String rmdup_bam
|
||
|
||
command <<<
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
# vardict
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||
-G ${ref} \
|
||
-f 0.01 \
|
||
-N ${name} \
|
||
-b ${rmdup_bam} \
|
||
-UN \
|
||
-Q 20 \
|
||
-m 3 \
|
||
-r 3 \
|
||
-z 1 \
|
||
-th 10 \
|
||
-c 1 -S 2 -E 3 -g 4 ${bed} \
|
||
|/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 \
|
||
>${output_dir}/mutation/${name}.snp.indel.raw.vcf
|
||
|
||
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref}
|
||
cp ${output_dir}/mutation/${name}.snp.indel.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf
|
||
|
||
python ~/project/pipeline/workflow/script/tools/vcf_filter.py \
|
||
-i ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \
|
||
-e 'INFO/AF[0] > 0.1' \
|
||
-o ${output_dir}/mutation/${name}.snp.indel.germline.vcf
|
||
|
||
>>>
|
||
|
||
output {
|
||
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
|
||
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline_vcf.vcf"
|
||
}
|
||
}
|
||
|
||
task mutation_calling_tissue_control {
|
||
String name
|
||
String bed
|
||
String ref
|
||
String output_dir
|
||
String tumor_rmdup_bam
|
||
String normal_rmdup_bam
|
||
|
||
command <<<
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||
-G ${ref} \
|
||
-f 0.01 \
|
||
-N ${name} \
|
||
-b "${tumor_rmdup_bam}|${normal_rmdup_bam}" \
|
||
-UN \
|
||
-Q 20 \
|
||
-m 3 \
|
||
-r 3 \
|
||
-th 20 \
|
||
-z 1 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/testsomatic.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_paired.pl -N ${name} -f 0.01 \
|
||
>${output_dir}/mutation/${name}.snp.indel.raw.vcf
|
||
|
||
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw.vcf ${output_dir}/mutation/${name}.snp.indel.vcf ${ref}
|
||
|
||
python ~/project/pipeline/workflow/script/tools/vcf_filter.py -i ${output_dir}/mutation/${name}.snp.indel.vcf \
|
||
-o ${output_dir}/mutation/${name}.snp.indel.germline.vcf \
|
||
-e 'INFO/STATUS="Germline"'
|
||
|
||
python ~/project/pipeline/workflow/script/tools/vcf_filter.py -i ${output_dir}/mutation/${name}.snp.indel.vcf \
|
||
-o ${output_dir}/mutation/${name}.snp.indel.somatic.vcf \
|
||
-e 'INFO/STATUS="StrongSomatic" | ( INFO/STATUS="LikelySomatic" && FORMAT/AF[0] > 3*FORMAT/AF[1] )'
|
||
|
||
>>>
|
||
|
||
output {
|
||
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
|
||
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf"
|
||
}
|
||
}
|
||
|
||
task mutation_calling_umi_control {
|
||
String name
|
||
String bed
|
||
String ref
|
||
String output_dir
|
||
String tumor_rmdup_bam
|
||
String normal_rmdup_bam
|
||
|
||
command <<<
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
# 对照样本
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||
-G ${ref} \
|
||
-f 0.01 \
|
||
-N ${name} \
|
||
-b ${normal_rmdup_bam} \
|
||
-UN \
|
||
-Q 20 \
|
||
-m 3 \
|
||
-r 3 \
|
||
-th 10 \
|
||
-c 1 -S 2 -E 3 -g 4 ${bed} |/dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
|/dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.01 >${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf
|
||
|
||
# 实验样本
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar \
|
||
-G ${ref} \
|
||
-f 0.001 \
|
||
-N ${name} \
|
||
-b ${tumor_rmdup_bam} \
|
||
-UN -Q 20 -m 3 -r 3 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl \
|
||
-N ${name} -E -f 0.001 > ${output_dir}/mutation/${name}.1r.snp.indel.vcf
|
||
|
||
#提取>=2条矫正的序列
|
||
func_fetch_bam.py ${output_dir}/alignment/${name}.rmdup.bam ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||
samtools index ${output_dir}/alignment/${name}.2r.rmdup.bam
|
||
|
||
# 保证 1r call mut umi family 里面有2条reads
|
||
#2条矫正的call
|
||
java -jar /dataseq/jmdna/software/VarDict-1.8.3/lib/VarDict-1.8.3.jar -G ${ref} \
|
||
-f 0.0001 -N ${name}_2r -b ${output_dir}/alignment/${name}.2r.rmdup.bam \
|
||
-UN -Q 20 -m 3 -r 1 -th 10 -c 1 -S 2 -E 3 -g 4 ${bed} | /dataseq/jmdna/software/VarDict-1.8.3/bin/teststrandbias.R \
|
||
| /dataseq/jmdna/software/VarDict-1.8.3/bin/var2vcf_valid.pl -N ${name} -E -f 0.001 >${output_dir}/mutation/${name}.2r.snp.indel.vcf
|
||
|
||
# merge突变,以1条方式call的>0.01的突变+两条方式的对一条方式的低频区域(AF<0.01)进行矫正。
|
||
filter_snpindel_umi_1r_plus_2r.pl \
|
||
${output_dir}/mutation/${name}.1r.snp.indel.vcf \
|
||
${output_dir}/mutation/${name}.2r.snp.indel.vcf \
|
||
${output_dir}/mutation/${name}.snp.indel.pre_raw.vcf
|
||
|
||
# 去除normal 中的突变位点
|
||
correct_umi_subnormal.pl \
|
||
${output_dir}/mutation/${name}.snp.indel.pre_raw.vcf \
|
||
${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf \
|
||
${output_dir}/mutation/${name}.snp.indel.raw_somaitc.vcf
|
||
|
||
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw_germline.vcf ${output_dir}/mutation/${name}.snp.indel.germline.vcf ${ref}
|
||
correct_genome_3rule.py ${output_dir}/mutation/${name}.snp.indel.raw_somaitc.vcf ${output_dir}/mutation/${name}.snp.indel.somatic.vcf ${ref}
|
||
|
||
>>>
|
||
|
||
output {
|
||
String somatic_vcf = "${output_dir}/mutation/${name}.snp.indel.somatic.vcf"
|
||
String germline_vcf = "${output_dir}/mutation/${name}.snp.indel.germline.vcf"
|
||
}
|
||
}
|
||
|
||
task annovar {
|
||
String prefix
|
||
String output_dir
|
||
String ref
|
||
String vcf
|
||
|
||
command <<<
|
||
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
table_annovar.pl \
|
||
${vcf} \
|
||
/dataseq/jmdna/software/annovar/humandb/ \
|
||
-buildver hg19 -nastring . -vcfinput -remove -otherinfo \
|
||
-protocol refGene,avsnp150,cosmic91,clinvar_20220320,1000g2015aug_all,1000g2015aug_eas,esp6500siv2_all,exac03nontcga,gnomad_genome,dbnsfp35c,cytoBand \
|
||
-argument '-splicing_threshold 2 -hgvs',,,,,,,,,, \
|
||
-intronhgvs 50 \
|
||
-operation g,f,f,f,f,f,f,f,f,f,r \
|
||
-outfile ${output_dir}/mutation/${prefix} \
|
||
-dot2underline
|
||
|
||
>>>
|
||
output {
|
||
String anno = "${output_dir}/mutation/${prefix}.hg19_multianno.txt"
|
||
}
|
||
}
|
||
|
||
task filter_umi {
|
||
String name
|
||
String anno
|
||
String project
|
||
String output_dir
|
||
String tumor_rmdup_bam
|
||
|
||
command <<<
|
||
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
filter_snpindel \
|
||
${anno} \
|
||
${project} \
|
||
c \
|
||
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno.filtered.pre.txt \
|
||
${output_dir}/mutation/${name}.snp.indel.germline.hg19_multianno.filtered.pre.txt \
|
||
${output_dir}/mutation/${name}.snp.indel.hg19_multianno.tag.txt
|
||
|
||
filter_snpindel_umi_correct_overlap_reads.py \
|
||
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno_filtered_pre.txt \
|
||
${tumor_rmdup_bam} \
|
||
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno_filtered.txt
|
||
|
||
>>>
|
||
output {
|
||
String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt"
|
||
String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
|
||
}
|
||
}
|
||
|
||
task filter_tissue {
|
||
String name
|
||
String anno
|
||
String project
|
||
String output_dir
|
||
String tumor_rmdup_bam
|
||
|
||
command <<<
|
||
|
||
if [ ! -d ${output_dir}/mutation ];then
|
||
mkdir ${output_dir}/mutation
|
||
fi
|
||
|
||
filter_snpindel.pl \
|
||
${anno} \
|
||
${project} \
|
||
t \
|
||
${output_dir}/mutation/${name}.snp.indel.somatic.hg19_multianno.filtered.txt \
|
||
${output_dir}/mutation/${name}.snp.indel.germline.hg19_multianno.filtered.txt \
|
||
${output_dir}/mutation/${name}.snp.indel.hg19_multianno.tag.txt
|
||
|
||
>>>
|
||
output {
|
||
String snvindel_filtered= "${output_dir}/mutation/${name}.snp.indel.Somatic.anno.hg19_multianno_filtered.txt"
|
||
String germline_filtered = "${output_dir}/mutation/${name}.snp.indel.Germline.anno.hg19_multianno_filtered.txt"
|
||
}
|
||
}
|
||
|
||
workflow call_mutation {
|
||
|
||
String tumor
|
||
String tumor_rmdup_bam
|
||
String? normal
|
||
String? normal_rmdup_bam
|
||
Boolean umi
|
||
String output_dir
|
||
String ref
|
||
String bed
|
||
|
||
# pipe 执行 mutation_calling => annovar => filter
|
||
|
||
# 单样本
|
||
if (!defined(normal)) {
|
||
if (umi) {
|
||
call mutation_calling_umi {
|
||
input:
|
||
name=tumor,
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
bed=bed,
|
||
rmdup_bam=tumor_rmdup_bam
|
||
}
|
||
call annovar as anno_somatic_umi {
|
||
input:
|
||
prefix="${tumor}.snp.indel.somatic",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_umi.somatic_vcf
|
||
}
|
||
call annovar as anno_germline_umi {
|
||
input:
|
||
prefix="${tumor}.snp.indel.germline",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_umi.germline_vcf
|
||
}
|
||
}
|
||
|
||
if (!umi) {
|
||
call mutation_calling_tissue {
|
||
input:
|
||
name=tumor,
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
bed=bed,
|
||
rmdup_bam=tumor_rmdup_bam
|
||
}
|
||
call annovar as anno_somatic_tissue {
|
||
input:
|
||
prefix="${tumor}.snp.indel.somatic",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_tissue.somatic_vcf
|
||
}
|
||
call annovar as anno_germline_tissue {
|
||
input:
|
||
prefix="${tumor}.snp.indel.germline",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_tissue.germline_vcf
|
||
}
|
||
}
|
||
}
|
||
# 双样本
|
||
if (defined(normal)) {
|
||
if (umi) {
|
||
call mutation_calling_umi_control {
|
||
input:
|
||
name=tumor,
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
bed=bed,
|
||
tumor_rmdup_bam=tumor_rmdup_bam,
|
||
normal_rmdup_bam=normal_rmdup_bam
|
||
|
||
}
|
||
call annovar as anno_somatic_umi_control {
|
||
input:
|
||
prefix="${tumor}.snp.indel.somatic",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_umi_control.somatic_vcf
|
||
}
|
||
call annovar as anno_germline_umi_control {
|
||
input:
|
||
prefix="${tumor}.snp.indel.germline",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_umi_control.germline_vcf
|
||
}
|
||
}
|
||
if (!umi) {
|
||
call mutation_calling_tissue_control {
|
||
input:
|
||
name=tumor,
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
bed=bed,
|
||
tumor_rmdup_bam=tumor_rmdup_bam,
|
||
normal_rmdup_bam=normal_rmdup_bam
|
||
|
||
}
|
||
call annovar as anno_somatic_tissue_control {
|
||
input:
|
||
prefix="${tumor}.snp.indel.somatic",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_tissue_control.somatic_vcf
|
||
}
|
||
call annovar as anno_germline_tissue_control {
|
||
input:
|
||
prefix="${tumor}.snp.indel.germline",
|
||
output_dir=output_dir,
|
||
ref=ref,
|
||
vcf=mutation_calling_tissue_control.germline_vcf
|
||
}
|
||
}
|
||
}
|
||
|
||
output {
|
||
String somatic_vcf = "${output_dir}/mutation/${tumor}.snp.indel.somatic.vcf"
|
||
String germline_vcf = "${output_dir}/mutation/${tumor}.snp.indel.germline.vcf"
|
||
}
|
||
} |